bdev/nvme: Handle ANA transition (change or inaccessible state) correctly

Previously, if a namespace is in ANA inaccessible state, I/O had been
queued infinitely. Fix this issue according to the NVMe spec.

Add a temporary poller anatt_timer and a flag ana_transition_timedout for
each nvme_ns.

Start anatt_timer if the nvme_ns enters ANA transition. If anatt_timer
is expired, set ana_transition_timedout to true. Cancel anatt_timer or
clear ana_transition_timedout if the nvme_ns exits ANA transition.

nvme_io_path_become_available() returns false if ana_transition_timedout
is true.

Add unit test case to verify these addition.

Signed-off-by: Shuhei Matsumoto <smatsumoto@nvidia.com>
Change-Id: Ic76933242046b3e8e553de88221b943ad097c91c
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/12194
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com>
Reviewed-by: Aleksey Marchuk <alexeymar@mellanox.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
Reviewed-by: Monica Kenguva <monica.kenguva@intel.com>
This commit is contained in:
Shuhei Matsumoto 2022-04-07 17:29:05 +09:00 committed by Tomasz Zawadzki
parent da2fc15f2a
commit 13ca6e52d3
3 changed files with 101 additions and 0 deletions

View File

@ -875,6 +875,10 @@ any_io_path_may_become_available(struct nvme_bdev_channel *nbdev_ch)
struct nvme_io_path *io_path;
STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
if (io_path->nvme_ns->ana_transition_timedout) {
continue;
}
if (nvme_io_path_is_connected(io_path) ||
!nvme_io_path_is_failed(io_path)) {
return true;
@ -2624,13 +2628,48 @@ bdev_nvme_parse_ana_log_page(struct nvme_ctrlr *nvme_ctrlr,
return rc;
}
static int
nvme_ns_ana_transition_timedout(void *ctx)
{
struct nvme_ns *nvme_ns = ctx;
spdk_poller_unregister(&nvme_ns->anatt_timer);
nvme_ns->ana_transition_timedout = true;
return SPDK_POLLER_BUSY;
}
static void
_nvme_ns_set_ana_state(struct nvme_ns *nvme_ns,
const struct spdk_nvme_ana_group_descriptor *desc)
{
const struct spdk_nvme_ctrlr_data *cdata;
nvme_ns->ana_group_id = desc->ana_group_id;
nvme_ns->ana_state = desc->ana_state;
nvme_ns->ana_state_updating = false;
switch (nvme_ns->ana_state) {
case SPDK_NVME_ANA_OPTIMIZED_STATE:
case SPDK_NVME_ANA_NON_OPTIMIZED_STATE:
nvme_ns->ana_transition_timedout = false;
spdk_poller_unregister(&nvme_ns->anatt_timer);
break;
case SPDK_NVME_ANA_INACCESSIBLE_STATE:
case SPDK_NVME_ANA_CHANGE_STATE:
if (nvme_ns->anatt_timer != NULL) {
break;
}
cdata = spdk_nvme_ctrlr_get_data(nvme_ns->ctrlr->ctrlr);
nvme_ns->anatt_timer = SPDK_POLLER_REGISTER(nvme_ns_ana_transition_timedout,
nvme_ns,
cdata->anatt * SPDK_SEC_TO_USEC);
break;
default:
break;
}
}
static int
@ -3114,6 +3153,8 @@ nvme_ctrlr_depopulate_namespace(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ns *n
{
struct nvme_bdev *bdev;
spdk_poller_unregister(&nvme_ns->anatt_timer);
bdev = nvme_ns->bdev;
if (bdev != NULL) {
pthread_mutex_lock(&bdev->mutex);

View File

@ -85,6 +85,8 @@ struct nvme_ns {
uint32_t ana_group_id;
enum spdk_nvme_ana_state ana_state;
bool ana_state_updating;
bool ana_transition_timedout;
struct spdk_poller *anatt_timer;
struct nvme_async_probe_ctx *probe_ctx;
TAILQ_ENTRY(nvme_ns) tailq;
RB_ENTRY(nvme_ns) node;

View File

@ -5930,6 +5930,63 @@ test_nvme_ns_cmp(void)
CU_ASSERT(nvme_ns_cmp(&nvme_ns2, &nvme_ns1) > 0);
}
static void
test_ana_transition(void)
{
struct spdk_nvme_ctrlr ctrlr = { .cdata.anatt = 10, };
struct nvme_ctrlr nvme_ctrlr = { .ctrlr = &ctrlr, };
struct nvme_ns nvme_ns = { .ctrlr = &nvme_ctrlr, };
struct spdk_nvme_ana_group_descriptor desc = { .ana_group_id = 1, };
/* case 1: ANA transition timedout is canceled. */
nvme_ns.ana_state = SPDK_NVME_ANA_CHANGE_STATE;
nvme_ns.ana_transition_timedout = true;
desc.ana_state = SPDK_NVME_ANA_OPTIMIZED_STATE;
_nvme_ns_set_ana_state(&nvme_ns, &desc);
CU_ASSERT(nvme_ns.ana_transition_timedout == false);
CU_ASSERT(nvme_ns.ana_state == SPDK_NVME_ANA_OPTIMIZED_STATE);
/* case 2: ANATT timer is kept. */
nvme_ns.ana_state = SPDK_NVME_ANA_CHANGE_STATE;
nvme_ns.anatt_timer = SPDK_POLLER_REGISTER(nvme_ns_ana_transition_timedout,
&nvme_ns,
ctrlr.cdata.anatt * SPDK_SEC_TO_USEC);
desc.ana_state = SPDK_NVME_ANA_INACCESSIBLE_STATE;
_nvme_ns_set_ana_state(&nvme_ns, &desc);
CU_ASSERT(nvme_ns.anatt_timer != NULL);
CU_ASSERT(nvme_ns.ana_state == SPDK_NVME_ANA_INACCESSIBLE_STATE);
/* case 3: ANATT timer is stopped. */
desc.ana_state = SPDK_NVME_ANA_OPTIMIZED_STATE;
_nvme_ns_set_ana_state(&nvme_ns, &desc);
CU_ASSERT(nvme_ns.anatt_timer == NULL);
CU_ASSERT(nvme_ns.ana_state == SPDK_NVME_ANA_OPTIMIZED_STATE);
/* ANATT timer is started. */
desc.ana_state = SPDK_NVME_ANA_CHANGE_STATE;
_nvme_ns_set_ana_state(&nvme_ns, &desc);
CU_ASSERT(nvme_ns.anatt_timer != NULL);
CU_ASSERT(nvme_ns.ana_state == SPDK_NVME_ANA_CHANGE_STATE);
/* ANATT timer is expired. */
spdk_delay_us(ctrlr.cdata.anatt * SPDK_SEC_TO_USEC);
poll_threads();
CU_ASSERT(nvme_ns.anatt_timer == NULL);
CU_ASSERT(nvme_ns.ana_transition_timedout == true);
}
int
main(int argc, const char **argv)
{
@ -5978,6 +6035,7 @@ main(int argc, const char **argv)
CU_ADD_TEST(suite, test_retry_failover_ctrlr);
CU_ADD_TEST(suite, test_fail_path);
CU_ADD_TEST(suite, test_nvme_ns_cmp);
CU_ADD_TEST(suite, test_ana_transition);
CU_basic_set_mode(CU_BRM_VERBOSE);