diff --git a/module/bdev/nvme/bdev_nvme.c b/module/bdev/nvme/bdev_nvme.c index e350082fe..d95542015 100644 --- a/module/bdev/nvme/bdev_nvme.c +++ b/module/bdev/nvme/bdev_nvme.c @@ -785,6 +785,20 @@ any_io_path_may_become_available(struct nvme_bdev_channel *nbdev_ch) return false; } +static bool +any_ctrlr_may_become_available(struct nvme_bdev_channel *nbdev_ch) +{ + struct nvme_io_path *io_path; + + STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) { + if (!nvme_io_path_is_failed(io_path)) { + return true; + } + } + + return false; +} + static int bdev_nvme_retry_ios(void *arg) { @@ -949,6 +963,7 @@ static inline void bdev_nvme_admin_passthru_complete(struct nvme_bdev_io *bio, int rc) { struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio); + struct nvme_bdev_channel *nbdev_ch; enum spdk_bdev_io_status io_status; switch (rc) { @@ -958,6 +973,15 @@ bdev_nvme_admin_passthru_complete(struct nvme_bdev_io *bio, int rc) case -ENOMEM: io_status = SPDK_BDEV_IO_STATUS_NOMEM; break; + case -ENXIO: + nbdev_ch = spdk_io_channel_get_ctx(spdk_bdev_io_get_io_channel(bdev_io)); + + if (any_ctrlr_may_become_available(nbdev_ch)) { + bdev_nvme_queue_retry_io(nbdev_ch, bio, 1000ULL); + return; + } + + /* fallthrough */ default: io_status = SPDK_BDEV_IO_STATUS_FAILED; break; @@ -4632,7 +4656,13 @@ bdev_nvme_admin_passthru(struct nvme_bdev_channel *nbdev_ch, struct nvme_bdev_io STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) { nvme_ctrlr = nvme_ctrlr_channel_get_ctrlr(io_path->ctrlr_ch); - if (spdk_nvme_ctrlr_is_failed(nvme_ctrlr->ctrlr)) { + /* When resetting a ctrlr, its adminq is disconnected first. + * spdk_nvme_ctrlr_cmd_admin_raw() returns -ENXIO if the ctrlr is + * failed or its adminq is disconnected. We should skip any ctrlr + * which is failed or resetting rather than checking if the return + * value of spdk_nvme_ctrlr_cmd_admin_raw() is -ENXIO. + */ + if (spdk_nvme_ctrlr_is_failed(nvme_ctrlr->ctrlr) || nvme_ctrlr->resetting) { continue; } diff --git a/test/unit/lib/bdev/nvme/bdev_nvme.c/bdev_nvme_ut.c b/test/unit/lib/bdev/nvme/bdev_nvme.c/bdev_nvme_ut.c index 027d13775..700227265 100644 --- a/test/unit/lib/bdev/nvme/bdev_nvme.c/bdev_nvme_ut.c +++ b/test/unit/lib/bdev/nvme/bdev_nvme.c/bdev_nvme_ut.c @@ -4665,6 +4665,120 @@ test_retry_io_for_ana_error(void) g_opts.bdev_retry_count = 0; } +static void +test_retry_admin_passthru_if_ctrlr_is_resetting(void) +{ + struct nvme_path_id path = {}; + struct spdk_nvme_ctrlr *ctrlr; + struct nvme_bdev_ctrlr *nbdev_ctrlr; + struct nvme_ctrlr *nvme_ctrlr; + const int STRING_SIZE = 32; + const char *attached_names[STRING_SIZE]; + struct nvme_bdev *bdev; + struct spdk_bdev_io *admin_io; + struct spdk_io_channel *ch; + struct nvme_bdev_channel *nbdev_ch; + int rc; + + memset(attached_names, 0, sizeof(char *) * STRING_SIZE); + ut_init_trid(&path.trid); + + set_thread(0); + + ctrlr = ut_attach_ctrlr(&path.trid, 1, false, false); + SPDK_CU_ASSERT_FATAL(ctrlr != NULL); + + g_ut_attach_ctrlr_status = 0; + g_ut_attach_bdev_count = 1; + + rc = bdev_nvme_create(&path.trid, "nvme0", attached_names, STRING_SIZE, 0, + attach_ctrlr_done, NULL, NULL, false); + CU_ASSERT(rc == 0); + + spdk_delay_us(1000); + poll_threads(); + + nbdev_ctrlr = nvme_bdev_ctrlr_get_by_name("nvme0"); + SPDK_CU_ASSERT_FATAL(nbdev_ctrlr != NULL); + + nvme_ctrlr = nvme_bdev_ctrlr_get_ctrlr(nbdev_ctrlr, &path.trid); + CU_ASSERT(nvme_ctrlr != NULL); + + bdev = nvme_bdev_ctrlr_get_bdev(nbdev_ctrlr, 1); + CU_ASSERT(bdev != NULL); + + admin_io = ut_alloc_bdev_io(SPDK_BDEV_IO_TYPE_NVME_ADMIN, bdev, NULL); + admin_io->u.nvme_passthru.cmd.opc = SPDK_NVME_OPC_GET_FEATURES; + + ch = spdk_get_io_channel(bdev); + SPDK_CU_ASSERT_FATAL(ch != NULL); + + nbdev_ch = spdk_io_channel_get_ctx(ch); + + admin_io->internal.ch = (struct spdk_bdev_channel *)ch; + + /* If ctrlr is available, admin passthrough should succeed. */ + admin_io->internal.in_submit_request = true; + + bdev_nvme_submit_request(ch, admin_io); + + CU_ASSERT(ctrlr->adminq.num_outstanding_reqs == 1); + CU_ASSERT(admin_io->internal.in_submit_request == true); + + spdk_delay_us(g_opts.nvme_adminq_poll_period_us); + poll_threads(); + + CU_ASSERT(admin_io->internal.in_submit_request == false); + CU_ASSERT(admin_io->internal.status = SPDK_BDEV_IO_STATUS_SUCCESS); + + /* If ctrlr is resetting, admin passthrough request should be queued + * if it is submitted while resetting ctrlr. + */ + bdev_nvme_reset(nvme_ctrlr); + + poll_thread_times(0, 1); + + admin_io->internal.in_submit_request = true; + + bdev_nvme_submit_request(ch, admin_io); + + CU_ASSERT(admin_io->internal.in_submit_request == true); + CU_ASSERT(admin_io == TAILQ_FIRST(&nbdev_ch->retry_io_list)); + + poll_threads(); + + CU_ASSERT(nvme_ctrlr->resetting == false); + + spdk_delay_us(1000000); + poll_thread_times(0, 1); + + CU_ASSERT(ctrlr->adminq.num_outstanding_reqs == 1); + CU_ASSERT(admin_io->internal.in_submit_request == true); + CU_ASSERT(TAILQ_EMPTY(&nbdev_ch->retry_io_list)); + + spdk_delay_us(g_opts.nvme_adminq_poll_period_us); + poll_threads(); + + CU_ASSERT(ctrlr->adminq.num_outstanding_reqs == 0); + CU_ASSERT(admin_io->internal.in_submit_request == false); + CU_ASSERT(admin_io->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS); + + free(admin_io); + + spdk_put_io_channel(ch); + + poll_threads(); + + rc = bdev_nvme_delete("nvme0", &g_any_path); + CU_ASSERT(rc == 0); + + poll_threads(); + spdk_delay_us(1000); + poll_threads(); + + CU_ASSERT(nvme_bdev_ctrlr_get_by_name("nvme0") == NULL); +} + int main(int argc, const char **argv) { @@ -4703,6 +4817,7 @@ main(int argc, const char **argv) CU_ADD_TEST(suite, test_retry_io_count); CU_ADD_TEST(suite, test_concurrent_read_ana_log_page); CU_ADD_TEST(suite, test_retry_io_for_ana_error); + CU_ADD_TEST(suite, test_retry_admin_passthru_if_ctrlr_is_resetting); CU_basic_set_mode(CU_BRM_VERBOSE);