bdev/nvme: Handle ANA transition (change or inaccessible state) correctly
Previously, if a namespace is in ANA inaccessible state, I/O had been queued infinitely. Fix this issue according to the NVMe spec. Add a temporary poller anatt_timer and a flag ana_transition_timedout for each nvme_ns. Start anatt_timer if the nvme_ns enters ANA transition. If anatt_timer is expired, set ana_transition_timedout to true. Cancel anatt_timer or clear ana_transition_timedout if the nvme_ns exits ANA transition. nvme_io_path_become_available() returns false if ana_transition_timedout is true. Add unit test case to verify these addition. Signed-off-by: Shuhei Matsumoto <smatsumoto@nvidia.com> Change-Id: Ic76933242046b3e8e553de88221b943ad097c91c Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/12194 Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com> Reviewed-by: Aleksey Marchuk <alexeymar@mellanox.com> Reviewed-by: Jim Harris <james.r.harris@intel.com> Reviewed-by: Monica Kenguva <monica.kenguva@intel.com>
This commit is contained in:
parent
da2fc15f2a
commit
13ca6e52d3
@ -875,6 +875,10 @@ any_io_path_may_become_available(struct nvme_bdev_channel *nbdev_ch)
|
||||
struct nvme_io_path *io_path;
|
||||
|
||||
STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
|
||||
if (io_path->nvme_ns->ana_transition_timedout) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (nvme_io_path_is_connected(io_path) ||
|
||||
!nvme_io_path_is_failed(io_path)) {
|
||||
return true;
|
||||
@ -2624,13 +2628,48 @@ bdev_nvme_parse_ana_log_page(struct nvme_ctrlr *nvme_ctrlr,
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int
|
||||
nvme_ns_ana_transition_timedout(void *ctx)
|
||||
{
|
||||
struct nvme_ns *nvme_ns = ctx;
|
||||
|
||||
spdk_poller_unregister(&nvme_ns->anatt_timer);
|
||||
nvme_ns->ana_transition_timedout = true;
|
||||
|
||||
return SPDK_POLLER_BUSY;
|
||||
}
|
||||
|
||||
static void
|
||||
_nvme_ns_set_ana_state(struct nvme_ns *nvme_ns,
|
||||
const struct spdk_nvme_ana_group_descriptor *desc)
|
||||
{
|
||||
const struct spdk_nvme_ctrlr_data *cdata;
|
||||
|
||||
nvme_ns->ana_group_id = desc->ana_group_id;
|
||||
nvme_ns->ana_state = desc->ana_state;
|
||||
nvme_ns->ana_state_updating = false;
|
||||
|
||||
switch (nvme_ns->ana_state) {
|
||||
case SPDK_NVME_ANA_OPTIMIZED_STATE:
|
||||
case SPDK_NVME_ANA_NON_OPTIMIZED_STATE:
|
||||
nvme_ns->ana_transition_timedout = false;
|
||||
spdk_poller_unregister(&nvme_ns->anatt_timer);
|
||||
break;
|
||||
|
||||
case SPDK_NVME_ANA_INACCESSIBLE_STATE:
|
||||
case SPDK_NVME_ANA_CHANGE_STATE:
|
||||
if (nvme_ns->anatt_timer != NULL) {
|
||||
break;
|
||||
}
|
||||
|
||||
cdata = spdk_nvme_ctrlr_get_data(nvme_ns->ctrlr->ctrlr);
|
||||
nvme_ns->anatt_timer = SPDK_POLLER_REGISTER(nvme_ns_ana_transition_timedout,
|
||||
nvme_ns,
|
||||
cdata->anatt * SPDK_SEC_TO_USEC);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
@ -3114,6 +3153,8 @@ nvme_ctrlr_depopulate_namespace(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ns *n
|
||||
{
|
||||
struct nvme_bdev *bdev;
|
||||
|
||||
spdk_poller_unregister(&nvme_ns->anatt_timer);
|
||||
|
||||
bdev = nvme_ns->bdev;
|
||||
if (bdev != NULL) {
|
||||
pthread_mutex_lock(&bdev->mutex);
|
||||
|
@ -85,6 +85,8 @@ struct nvme_ns {
|
||||
uint32_t ana_group_id;
|
||||
enum spdk_nvme_ana_state ana_state;
|
||||
bool ana_state_updating;
|
||||
bool ana_transition_timedout;
|
||||
struct spdk_poller *anatt_timer;
|
||||
struct nvme_async_probe_ctx *probe_ctx;
|
||||
TAILQ_ENTRY(nvme_ns) tailq;
|
||||
RB_ENTRY(nvme_ns) node;
|
||||
|
@ -5930,6 +5930,63 @@ test_nvme_ns_cmp(void)
|
||||
CU_ASSERT(nvme_ns_cmp(&nvme_ns2, &nvme_ns1) > 0);
|
||||
}
|
||||
|
||||
static void
|
||||
test_ana_transition(void)
|
||||
{
|
||||
struct spdk_nvme_ctrlr ctrlr = { .cdata.anatt = 10, };
|
||||
struct nvme_ctrlr nvme_ctrlr = { .ctrlr = &ctrlr, };
|
||||
struct nvme_ns nvme_ns = { .ctrlr = &nvme_ctrlr, };
|
||||
struct spdk_nvme_ana_group_descriptor desc = { .ana_group_id = 1, };
|
||||
|
||||
/* case 1: ANA transition timedout is canceled. */
|
||||
nvme_ns.ana_state = SPDK_NVME_ANA_CHANGE_STATE;
|
||||
nvme_ns.ana_transition_timedout = true;
|
||||
|
||||
desc.ana_state = SPDK_NVME_ANA_OPTIMIZED_STATE;
|
||||
|
||||
_nvme_ns_set_ana_state(&nvme_ns, &desc);
|
||||
|
||||
CU_ASSERT(nvme_ns.ana_transition_timedout == false);
|
||||
CU_ASSERT(nvme_ns.ana_state == SPDK_NVME_ANA_OPTIMIZED_STATE);
|
||||
|
||||
/* case 2: ANATT timer is kept. */
|
||||
nvme_ns.ana_state = SPDK_NVME_ANA_CHANGE_STATE;
|
||||
nvme_ns.anatt_timer = SPDK_POLLER_REGISTER(nvme_ns_ana_transition_timedout,
|
||||
&nvme_ns,
|
||||
ctrlr.cdata.anatt * SPDK_SEC_TO_USEC);
|
||||
|
||||
desc.ana_state = SPDK_NVME_ANA_INACCESSIBLE_STATE;
|
||||
|
||||
_nvme_ns_set_ana_state(&nvme_ns, &desc);
|
||||
|
||||
CU_ASSERT(nvme_ns.anatt_timer != NULL);
|
||||
CU_ASSERT(nvme_ns.ana_state == SPDK_NVME_ANA_INACCESSIBLE_STATE);
|
||||
|
||||
/* case 3: ANATT timer is stopped. */
|
||||
desc.ana_state = SPDK_NVME_ANA_OPTIMIZED_STATE;
|
||||
|
||||
_nvme_ns_set_ana_state(&nvme_ns, &desc);
|
||||
|
||||
CU_ASSERT(nvme_ns.anatt_timer == NULL);
|
||||
CU_ASSERT(nvme_ns.ana_state == SPDK_NVME_ANA_OPTIMIZED_STATE);
|
||||
|
||||
/* ANATT timer is started. */
|
||||
desc.ana_state = SPDK_NVME_ANA_CHANGE_STATE;
|
||||
|
||||
_nvme_ns_set_ana_state(&nvme_ns, &desc);
|
||||
|
||||
CU_ASSERT(nvme_ns.anatt_timer != NULL);
|
||||
CU_ASSERT(nvme_ns.ana_state == SPDK_NVME_ANA_CHANGE_STATE);
|
||||
|
||||
/* ANATT timer is expired. */
|
||||
spdk_delay_us(ctrlr.cdata.anatt * SPDK_SEC_TO_USEC);
|
||||
|
||||
poll_threads();
|
||||
|
||||
CU_ASSERT(nvme_ns.anatt_timer == NULL);
|
||||
CU_ASSERT(nvme_ns.ana_transition_timedout == true);
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, const char **argv)
|
||||
{
|
||||
@ -5978,6 +6035,7 @@ main(int argc, const char **argv)
|
||||
CU_ADD_TEST(suite, test_retry_failover_ctrlr);
|
||||
CU_ADD_TEST(suite, test_fail_path);
|
||||
CU_ADD_TEST(suite, test_nvme_ns_cmp);
|
||||
CU_ADD_TEST(suite, test_ana_transition);
|
||||
|
||||
CU_basic_set_mode(CU_BRM_VERBOSE);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user