From 35a2f4e22e23b38062bfbc70ced5f2417febe5f3 Mon Sep 17 00:00:00 2001 From: Shuhei Matsumoto Date: Fri, 19 Nov 2021 02:29:28 +0900 Subject: [PATCH] bdev/nvme: Retry admin passthru a second later if any ctrlr may become available When resetting ctrlr, adminq is disconnected first. If adminq is disconnected, admin passthrough request is rejected with -ENXIO. But resetting ctrlr may succeed. If resetting ctrlr succeeds, adminq is connected again, and admin passthrough request will be submitted successfully. On the other hand, if ctrlr is failed, admin passthrough request is rejected with -ENXIO. But when resetting ctrlr, ctrlr is set to unfailed. Hence bdev_nvme_admin_passthru() skips any ctrlr which is resetting or failed, and calls bdev_nvme_admin_passthru_complete() with -ENXIO if no available ctrlr is found. bdev_nvme_admin_passthru_complete() queues admin passthrough request and retry it one second later if ctrlr is resetting. Signed-off-by: Shuhei Matsumoto Change-Id: Ic748dc4faf29ebf717ae5c29dcf7c55fe2ea9243 Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/9942 Community-CI: Broadcom CI Community-CI: Mellanox Build Bot Tested-by: SPDK CI Jenkins Reviewed-by: Ben Walker Reviewed-by: Aleksey Marchuk --- module/bdev/nvme/bdev_nvme.c | 32 ++++- .../lib/bdev/nvme/bdev_nvme.c/bdev_nvme_ut.c | 115 ++++++++++++++++++ 2 files changed, 146 insertions(+), 1 deletion(-) diff --git a/module/bdev/nvme/bdev_nvme.c b/module/bdev/nvme/bdev_nvme.c index e350082fe..d95542015 100644 --- a/module/bdev/nvme/bdev_nvme.c +++ b/module/bdev/nvme/bdev_nvme.c @@ -785,6 +785,20 @@ any_io_path_may_become_available(struct nvme_bdev_channel *nbdev_ch) return false; } +static bool +any_ctrlr_may_become_available(struct nvme_bdev_channel *nbdev_ch) +{ + struct nvme_io_path *io_path; + + STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) { + if (!nvme_io_path_is_failed(io_path)) { + return true; + } + } + + return false; +} + static int bdev_nvme_retry_ios(void *arg) { @@ -949,6 +963,7 @@ static inline void bdev_nvme_admin_passthru_complete(struct nvme_bdev_io *bio, int rc) { struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio); + struct nvme_bdev_channel *nbdev_ch; enum spdk_bdev_io_status io_status; switch (rc) { @@ -958,6 +973,15 @@ bdev_nvme_admin_passthru_complete(struct nvme_bdev_io *bio, int rc) case -ENOMEM: io_status = SPDK_BDEV_IO_STATUS_NOMEM; break; + case -ENXIO: + nbdev_ch = spdk_io_channel_get_ctx(spdk_bdev_io_get_io_channel(bdev_io)); + + if (any_ctrlr_may_become_available(nbdev_ch)) { + bdev_nvme_queue_retry_io(nbdev_ch, bio, 1000ULL); + return; + } + + /* fallthrough */ default: io_status = SPDK_BDEV_IO_STATUS_FAILED; break; @@ -4632,7 +4656,13 @@ bdev_nvme_admin_passthru(struct nvme_bdev_channel *nbdev_ch, struct nvme_bdev_io STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) { nvme_ctrlr = nvme_ctrlr_channel_get_ctrlr(io_path->ctrlr_ch); - if (spdk_nvme_ctrlr_is_failed(nvme_ctrlr->ctrlr)) { + /* When resetting a ctrlr, its adminq is disconnected first. + * spdk_nvme_ctrlr_cmd_admin_raw() returns -ENXIO if the ctrlr is + * failed or its adminq is disconnected. We should skip any ctrlr + * which is failed or resetting rather than checking if the return + * value of spdk_nvme_ctrlr_cmd_admin_raw() is -ENXIO. + */ + if (spdk_nvme_ctrlr_is_failed(nvme_ctrlr->ctrlr) || nvme_ctrlr->resetting) { continue; } diff --git a/test/unit/lib/bdev/nvme/bdev_nvme.c/bdev_nvme_ut.c b/test/unit/lib/bdev/nvme/bdev_nvme.c/bdev_nvme_ut.c index 027d13775..700227265 100644 --- a/test/unit/lib/bdev/nvme/bdev_nvme.c/bdev_nvme_ut.c +++ b/test/unit/lib/bdev/nvme/bdev_nvme.c/bdev_nvme_ut.c @@ -4665,6 +4665,120 @@ test_retry_io_for_ana_error(void) g_opts.bdev_retry_count = 0; } +static void +test_retry_admin_passthru_if_ctrlr_is_resetting(void) +{ + struct nvme_path_id path = {}; + struct spdk_nvme_ctrlr *ctrlr; + struct nvme_bdev_ctrlr *nbdev_ctrlr; + struct nvme_ctrlr *nvme_ctrlr; + const int STRING_SIZE = 32; + const char *attached_names[STRING_SIZE]; + struct nvme_bdev *bdev; + struct spdk_bdev_io *admin_io; + struct spdk_io_channel *ch; + struct nvme_bdev_channel *nbdev_ch; + int rc; + + memset(attached_names, 0, sizeof(char *) * STRING_SIZE); + ut_init_trid(&path.trid); + + set_thread(0); + + ctrlr = ut_attach_ctrlr(&path.trid, 1, false, false); + SPDK_CU_ASSERT_FATAL(ctrlr != NULL); + + g_ut_attach_ctrlr_status = 0; + g_ut_attach_bdev_count = 1; + + rc = bdev_nvme_create(&path.trid, "nvme0", attached_names, STRING_SIZE, 0, + attach_ctrlr_done, NULL, NULL, false); + CU_ASSERT(rc == 0); + + spdk_delay_us(1000); + poll_threads(); + + nbdev_ctrlr = nvme_bdev_ctrlr_get_by_name("nvme0"); + SPDK_CU_ASSERT_FATAL(nbdev_ctrlr != NULL); + + nvme_ctrlr = nvme_bdev_ctrlr_get_ctrlr(nbdev_ctrlr, &path.trid); + CU_ASSERT(nvme_ctrlr != NULL); + + bdev = nvme_bdev_ctrlr_get_bdev(nbdev_ctrlr, 1); + CU_ASSERT(bdev != NULL); + + admin_io = ut_alloc_bdev_io(SPDK_BDEV_IO_TYPE_NVME_ADMIN, bdev, NULL); + admin_io->u.nvme_passthru.cmd.opc = SPDK_NVME_OPC_GET_FEATURES; + + ch = spdk_get_io_channel(bdev); + SPDK_CU_ASSERT_FATAL(ch != NULL); + + nbdev_ch = spdk_io_channel_get_ctx(ch); + + admin_io->internal.ch = (struct spdk_bdev_channel *)ch; + + /* If ctrlr is available, admin passthrough should succeed. */ + admin_io->internal.in_submit_request = true; + + bdev_nvme_submit_request(ch, admin_io); + + CU_ASSERT(ctrlr->adminq.num_outstanding_reqs == 1); + CU_ASSERT(admin_io->internal.in_submit_request == true); + + spdk_delay_us(g_opts.nvme_adminq_poll_period_us); + poll_threads(); + + CU_ASSERT(admin_io->internal.in_submit_request == false); + CU_ASSERT(admin_io->internal.status = SPDK_BDEV_IO_STATUS_SUCCESS); + + /* If ctrlr is resetting, admin passthrough request should be queued + * if it is submitted while resetting ctrlr. + */ + bdev_nvme_reset(nvme_ctrlr); + + poll_thread_times(0, 1); + + admin_io->internal.in_submit_request = true; + + bdev_nvme_submit_request(ch, admin_io); + + CU_ASSERT(admin_io->internal.in_submit_request == true); + CU_ASSERT(admin_io == TAILQ_FIRST(&nbdev_ch->retry_io_list)); + + poll_threads(); + + CU_ASSERT(nvme_ctrlr->resetting == false); + + spdk_delay_us(1000000); + poll_thread_times(0, 1); + + CU_ASSERT(ctrlr->adminq.num_outstanding_reqs == 1); + CU_ASSERT(admin_io->internal.in_submit_request == true); + CU_ASSERT(TAILQ_EMPTY(&nbdev_ch->retry_io_list)); + + spdk_delay_us(g_opts.nvme_adminq_poll_period_us); + poll_threads(); + + CU_ASSERT(ctrlr->adminq.num_outstanding_reqs == 0); + CU_ASSERT(admin_io->internal.in_submit_request == false); + CU_ASSERT(admin_io->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS); + + free(admin_io); + + spdk_put_io_channel(ch); + + poll_threads(); + + rc = bdev_nvme_delete("nvme0", &g_any_path); + CU_ASSERT(rc == 0); + + poll_threads(); + spdk_delay_us(1000); + poll_threads(); + + CU_ASSERT(nvme_bdev_ctrlr_get_by_name("nvme0") == NULL); +} + int main(int argc, const char **argv) { @@ -4703,6 +4817,7 @@ main(int argc, const char **argv) CU_ADD_TEST(suite, test_retry_io_count); CU_ADD_TEST(suite, test_concurrent_read_ana_log_page); CU_ADD_TEST(suite, test_retry_io_for_ana_error); + CU_ADD_TEST(suite, test_retry_admin_passthru_if_ctrlr_is_resetting); CU_basic_set_mode(CU_BRM_VERBOSE);