From 84ac18e54513dfa52b03c2dd5b77a221ca4ec4f7 Mon Sep 17 00:00:00 2001 From: Shuhei Matsumoto Date: Tue, 19 Oct 2021 07:01:30 +0900 Subject: [PATCH] bdev/nvme: Update ANA state if I/O failed by ANA error If I/O got ANA error, ANA state may be out of date. So in this case read ANA log page and update ANA states. Mark nvme_ns to be updating to avoid using while updating ANA state. Signed-off-by: Shuhei Matsumoto Change-Id: Ia43d38b3a589c84d6d0479dedcced033e76fb194 Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/9458 Tested-by: SPDK CI Jenkins Community-CI: Broadcom CI Community-CI: Mellanox Build Bot Reviewed-by: Ben Walker Reviewed-by: Aleksey Marchuk --- module/bdev/nvme/bdev_nvme.c | 16 +- module/bdev/nvme/bdev_nvme.h | 1 + .../lib/bdev/nvme/bdev_nvme.c/bdev_nvme_ut.c | 150 ++++++++++++++++++ 3 files changed, 166 insertions(+), 1 deletion(-) diff --git a/module/bdev/nvme/bdev_nvme.c b/module/bdev/nvme/bdev_nvme.c index a9932c68a..cbdb07fdc 100644 --- a/module/bdev/nvme/bdev_nvme.c +++ b/module/bdev/nvme/bdev_nvme.c @@ -200,6 +200,7 @@ static void bdev_nvme_reset_io(struct nvme_bdev_channel *nbdev_ch, struct nvme_b static int bdev_nvme_reset(struct nvme_ctrlr *nvme_ctrlr); static int bdev_nvme_failover(struct nvme_ctrlr *nvme_ctrlr, bool remove); static void remove_cb(void *cb_ctx, struct spdk_nvme_ctrlr *ctrlr); +static void nvme_ctrlr_read_ana_log_page(struct nvme_ctrlr *nvme_ctrlr); static int nvme_ns_cmp(struct nvme_ns *ns1, struct nvme_ns *ns2) @@ -671,6 +672,10 @@ bdev_nvme_io_type_is_admin(enum spdk_bdev_io_type io_type) static inline bool nvme_ns_is_accessible(struct nvme_ns *nvme_ns) { + if (spdk_unlikely(nvme_ns->ana_state_updating)) { + return false; + } + switch (nvme_ns->ana_state) { case SPDK_NVME_ANA_OPTIMIZED_STATE: case SPDK_NVME_ANA_NON_OPTIMIZED_STATE: @@ -723,6 +728,10 @@ bdev_nvme_find_io_path(struct nvme_bdev_channel *nbdev_ch) continue; } + if (spdk_unlikely(io_path->nvme_ns->ana_state_updating)) { + continue; + } + switch (io_path->nvme_ns->ana_state) { case SPDK_NVME_ANA_OPTIMIZED_STATE: return io_path; @@ -851,6 +860,7 @@ bdev_nvme_io_complete_nvme_status(struct nvme_bdev_io *bio, } nbdev_ch = spdk_io_channel_get_ctx(spdk_bdev_io_get_io_channel(bdev_io)); + nvme_ctrlr = nvme_ctrlr_channel_get_ctrlr(bio->io_path->ctrlr_ch); assert(bio->io_path != NULL); @@ -858,13 +868,16 @@ bdev_nvme_io_complete_nvme_status(struct nvme_bdev_io *bio, spdk_nvme_cpl_is_aborted_sq_deletion(cpl) || !nvme_io_path_is_available(bio->io_path) || nvme_io_path_is_failed(bio->io_path)) { + if (spdk_nvme_cpl_is_ana_error(cpl)) { + bio->io_path->nvme_ns->ana_state_updating = true; + nvme_ctrlr_read_ana_log_page(nvme_ctrlr); + } delay_ms = 0; } else if (spdk_nvme_cpl_is_aborted_by_request(cpl)) { goto complete; } else { bio->retry_count++; - nvme_ctrlr = nvme_ctrlr_channel_get_ctrlr(bio->io_path->ctrlr_ch); cdata = spdk_nvme_ctrlr_get_data(nvme_ctrlr->ctrlr); if (cpl->status.crd != 0) { @@ -2741,6 +2754,7 @@ nvme_ctrlr_set_ana_states(const struct spdk_nvme_ana_group_descriptor *desc, nvme_ns->ana_group_id = desc->ana_group_id; nvme_ns->ana_state = desc->ana_state; + nvme_ns->ana_state_updating = false; } return 0; diff --git a/module/bdev/nvme/bdev_nvme.h b/module/bdev/nvme/bdev_nvme.h index 0b2050f5d..ba06cd77a 100644 --- a/module/bdev/nvme/bdev_nvme.h +++ b/module/bdev/nvme/bdev_nvme.h @@ -73,6 +73,7 @@ struct nvme_ns { struct nvme_bdev *bdev; uint32_t ana_group_id; enum spdk_nvme_ana_state ana_state; + bool ana_state_updating; struct nvme_async_probe_ctx *probe_ctx; TAILQ_ENTRY(nvme_ns) tailq; RB_ENTRY(nvme_ns) node; diff --git a/test/unit/lib/bdev/nvme/bdev_nvme.c/bdev_nvme_ut.c b/test/unit/lib/bdev/nvme/bdev_nvme.c/bdev_nvme_ut.c index f36cde08c..b3c6d1c10 100644 --- a/test/unit/lib/bdev/nvme/bdev_nvme.c/bdev_nvme_ut.c +++ b/test/unit/lib/bdev/nvme/bdev_nvme.c/bdev_nvme_ut.c @@ -4476,6 +4476,155 @@ test_concurrent_read_ana_log_page(void) CU_ASSERT(nvme_ctrlr_get_by_name("nvme0") == NULL); } +static void +test_retry_io_for_ana_error(void) +{ + struct nvme_path_id path = {}; + struct spdk_nvme_ctrlr *ctrlr; + struct nvme_bdev_ctrlr *nbdev_ctrlr; + struct nvme_ctrlr *nvme_ctrlr; + const int STRING_SIZE = 32; + const char *attached_names[STRING_SIZE]; + struct nvme_bdev *bdev; + struct nvme_ns *nvme_ns; + struct spdk_bdev_io *bdev_io; + struct nvme_bdev_io *bio; + struct spdk_io_channel *ch; + struct nvme_bdev_channel *nbdev_ch; + struct nvme_io_path *io_path; + struct nvme_ctrlr_channel *ctrlr_ch; + struct ut_nvme_req *req; + uint64_t now; + int rc; + + memset(attached_names, 0, sizeof(char *) * STRING_SIZE); + ut_init_trid(&path.trid); + + g_opts.bdev_retry_count = 1; + + set_thread(0); + + ctrlr = ut_attach_ctrlr(&path.trid, 1, true, false); + SPDK_CU_ASSERT_FATAL(ctrlr != NULL); + + g_ut_attach_ctrlr_status = 0; + g_ut_attach_bdev_count = 1; + + rc = bdev_nvme_create(&path.trid, "nvme0", attached_names, STRING_SIZE, 0, + attach_ctrlr_done, NULL, NULL, false); + CU_ASSERT(rc == 0); + + spdk_delay_us(1000); + poll_threads(); + + spdk_delay_us(g_opts.nvme_adminq_poll_period_us); + poll_threads(); + + nbdev_ctrlr = nvme_bdev_ctrlr_get("nvme0"); + SPDK_CU_ASSERT_FATAL(nbdev_ctrlr != NULL); + + nvme_ctrlr = nvme_bdev_ctrlr_get_ctrlr(nbdev_ctrlr, &path.trid); + CU_ASSERT(nvme_ctrlr != NULL); + + bdev = nvme_bdev_ctrlr_get_bdev(nbdev_ctrlr, 1); + CU_ASSERT(bdev != NULL); + + nvme_ns = nvme_ctrlr_get_first_active_ns(nvme_ctrlr); + CU_ASSERT(nvme_ns != NULL); + + bdev_io = ut_alloc_bdev_io(SPDK_BDEV_IO_TYPE_WRITE, bdev, NULL); + ut_bdev_io_set_buf(bdev_io); + + bio = (struct nvme_bdev_io *)bdev_io->driver_ctx; + + ch = spdk_get_io_channel(bdev); + SPDK_CU_ASSERT_FATAL(ch != NULL); + + nbdev_ch = spdk_io_channel_get_ctx(ch); + + io_path = ut_get_io_path_by_ctrlr(nbdev_ch, nvme_ctrlr); + SPDK_CU_ASSERT_FATAL(io_path != NULL); + + ctrlr_ch = io_path->ctrlr_ch; + SPDK_CU_ASSERT_FATAL(ctrlr_ch != NULL); + SPDK_CU_ASSERT_FATAL(ctrlr_ch->qpair != NULL); + + now = spdk_get_ticks(); + + bdev_io->internal.ch = (struct spdk_bdev_channel *)ch; + + /* If I/O got ANA error, it should be queued, the corresponding namespace + * should be freezed and its ANA state should be updated. + */ + bdev_io->internal.in_submit_request = true; + + bdev_nvme_submit_request(ch, bdev_io); + + CU_ASSERT(ctrlr_ch->qpair->num_outstanding_reqs == 1); + CU_ASSERT(bdev_io->internal.in_submit_request == true); + + req = ut_get_outstanding_nvme_request(ctrlr_ch->qpair, bio); + SPDK_CU_ASSERT_FATAL(req != NULL); + + nvme_ns->ana_state = SPDK_NVME_ANA_INACCESSIBLE_STATE; + req->cpl.status.sc = SPDK_NVME_SC_ASYMMETRIC_ACCESS_INACCESSIBLE; + req->cpl.status.sct = SPDK_NVME_SCT_PATH; + + poll_thread_times(0, 1); + + CU_ASSERT(ctrlr_ch->qpair->num_outstanding_reqs == 0); + CU_ASSERT(bdev_io->internal.in_submit_request == true); + CU_ASSERT(bdev_io == TAILQ_FIRST(&nbdev_ch->retry_io_list)); + /* I/O should be retried immediately. */ + CU_ASSERT(bio->retry_ticks == now); + CU_ASSERT(nvme_ns->ana_state_updating == true); + CU_ASSERT(nvme_ctrlr->ana_log_page_updating == true); + + poll_threads(); + + /* Namespace is inaccessible, and hence I/O should be queued again. */ + CU_ASSERT(ctrlr_ch->qpair->num_outstanding_reqs == 0); + CU_ASSERT(bdev_io->internal.in_submit_request == true); + CU_ASSERT(bdev_io == TAILQ_FIRST(&nbdev_ch->retry_io_list)); + /* I/O should be retried after a second if no I/O path was found but + * any I/O path may become available. + */ + CU_ASSERT(bio->retry_ticks == now + spdk_get_ticks_hz()); + + /* Namespace should be unfreezed after completing to update its ANA state. */ + spdk_delay_us(g_opts.nvme_adminq_poll_period_us); + poll_threads(); + + CU_ASSERT(nvme_ns->ana_state_updating == false); + CU_ASSERT(nvme_ns->ana_state == SPDK_NVME_ANA_OPTIMIZED_STATE); + CU_ASSERT(nvme_ctrlr->ana_log_page_updating == false); + + /* Retry the queued I/O should succeed. */ + spdk_delay_us(spdk_get_ticks_hz() - g_opts.nvme_adminq_poll_period_us); + poll_threads(); + + CU_ASSERT(ctrlr_ch->qpair->num_outstanding_reqs == 0); + CU_ASSERT(bdev_io->internal.in_submit_request == false); + CU_ASSERT(bdev_io->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS); + + free(bdev_io); + + spdk_put_io_channel(ch); + + poll_threads(); + + rc = bdev_nvme_delete("nvme0", &g_any_path); + CU_ASSERT(rc == 0); + + poll_threads(); + spdk_delay_us(1000); + poll_threads(); + + CU_ASSERT(nvme_bdev_ctrlr_get("nvme0") == NULL); + + g_opts.bdev_retry_count = 0; +} + int main(int argc, const char **argv) { @@ -4513,6 +4662,7 @@ main(int argc, const char **argv) CU_ADD_TEST(suite, test_retry_io_for_io_path_error); CU_ADD_TEST(suite, test_retry_io_count); CU_ADD_TEST(suite, test_concurrent_read_ana_log_page); + CU_ADD_TEST(suite, test_retry_io_for_ana_error); CU_basic_set_mode(CU_BRM_VERBOSE);