From b4447abf706fc176e8cc54f12a041aba1afd52a0 Mon Sep 17 00:00:00 2001 From: Shuhei Matsumoto Date: Mon, 15 Nov 2021 13:07:57 +0900 Subject: [PATCH] bdev/nvme: Retry failed admin passthru up to retry_count times This patch supports admin passthrough retry when we get any error with DNR=0 but ABORTED_BY_REQUEST up to retry_count times. Signed-off-by: Shuhei Matsumoto Change-Id: I1bf29570791fdbe8651fa70c4c8685bb740fb86b Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/9944 Community-CI: Mellanox Build Bot Community-CI: Broadcom CI Reviewed-by: Ben Walker Reviewed-by: Aleksey Marchuk Tested-by: SPDK CI Jenkins --- module/bdev/nvme/bdev_nvme.c | 27 +++- .../lib/bdev/nvme/bdev_nvme.c/bdev_nvme_ut.c | 125 ++++++++++++++++++ 2 files changed, 148 insertions(+), 4 deletions(-) diff --git a/module/bdev/nvme/bdev_nvme.c b/module/bdev/nvme/bdev_nvme.c index 29005547f..90c756a7f 100644 --- a/module/bdev/nvme/bdev_nvme.c +++ b/module/bdev/nvme/bdev_nvme.c @@ -987,6 +987,7 @@ bdev_nvme_admin_passthru_complete(struct nvme_bdev_io *bio, int rc) break; } + bio->retry_count = 0; spdk_bdev_io_complete(bdev_io, io_status); } @@ -4156,6 +4157,8 @@ bdev_nvme_admin_passthru_complete_nvme_status(void *ctx) const struct spdk_nvme_cpl *cpl = &bio->cpl; struct nvme_bdev_channel *nbdev_ch; struct nvme_ctrlr *nvme_ctrlr; + const struct spdk_nvme_ctrlr_data *cdata; + uint64_t delay_ms; assert(bdev_nvme_io_type_is_admin(bdev_io->type)); @@ -4163,7 +4166,8 @@ bdev_nvme_admin_passthru_complete_nvme_status(void *ctx) goto complete; } - if (cpl->status.dnr != 0) { + if (cpl->status.dnr != 0 || (g_opts.bdev_retry_count != -1 && + bio->retry_count >= g_opts.bdev_retry_count)) { goto complete; } @@ -4174,13 +4178,28 @@ bdev_nvme_admin_passthru_complete_nvme_status(void *ctx) spdk_nvme_cpl_is_aborted_sq_deletion(cpl) || spdk_nvme_ctrlr_is_failed(nvme_ctrlr->ctrlr) || nvme_ctrlr->resetting) { - if (any_ctrlr_may_become_available(nbdev_ch)) { - bdev_nvme_queue_retry_io(nbdev_ch, bio, 0); - return; + delay_ms = 0; + } else if (spdk_nvme_cpl_is_aborted_by_request(cpl)) { + goto complete; + } else { + bio->retry_count++; + + cdata = spdk_nvme_ctrlr_get_data(nvme_ctrlr->ctrlr); + + if (cpl->status.crd != 0) { + delay_ms = cdata->crdt[cpl->status.crd] * 100; + } else { + delay_ms = 0; } } + if (any_ctrlr_may_become_available(nbdev_ch)) { + bdev_nvme_queue_retry_io(nbdev_ch, bio, delay_ms); + return; + } + complete: + bio->retry_count = 0; spdk_bdev_io_complete_nvme_status(bdev_io, cpl->cdw0, cpl->status.sct, cpl->status.sc); } diff --git a/test/unit/lib/bdev/nvme/bdev_nvme.c/bdev_nvme_ut.c b/test/unit/lib/bdev/nvme/bdev_nvme.c/bdev_nvme_ut.c index 7dfa8a73a..8b9727b9e 100644 --- a/test/unit/lib/bdev/nvme/bdev_nvme.c/bdev_nvme_ut.c +++ b/test/unit/lib/bdev/nvme/bdev_nvme.c/bdev_nvme_ut.c @@ -4683,6 +4683,8 @@ test_retry_admin_passthru_if_ctrlr_is_resetting(void) memset(attached_names, 0, sizeof(char *) * STRING_SIZE); ut_init_trid(&path.trid); + g_opts.bdev_retry_count = 1; + set_thread(0); ctrlr = ut_attach_ctrlr(&path.trid, 1, false, false); @@ -4777,6 +4779,8 @@ test_retry_admin_passthru_if_ctrlr_is_resetting(void) poll_threads(); CU_ASSERT(nvme_bdev_ctrlr_get_by_name("nvme0") == NULL); + + g_opts.bdev_retry_count = 0; } static void @@ -4957,6 +4961,126 @@ test_retry_admin_passthru_for_path_error(void) g_opts.bdev_retry_count = 0; } +static void +test_retry_admin_passthru_by_count(void) +{ + struct nvme_path_id path = {}; + struct spdk_nvme_ctrlr *ctrlr; + struct nvme_bdev_ctrlr *nbdev_ctrlr; + struct nvme_ctrlr *nvme_ctrlr; + const int STRING_SIZE = 32; + const char *attached_names[STRING_SIZE]; + struct nvme_bdev *bdev; + struct spdk_bdev_io *admin_io; + struct nvme_bdev_io *admin_bio; + struct spdk_io_channel *ch; + struct ut_nvme_req *req; + int rc; + + memset(attached_names, 0, sizeof(char *) * STRING_SIZE); + ut_init_trid(&path.trid); + + set_thread(0); + + ctrlr = ut_attach_ctrlr(&path.trid, 1, false, false); + SPDK_CU_ASSERT_FATAL(ctrlr != NULL); + + g_ut_attach_ctrlr_status = 0; + g_ut_attach_bdev_count = 1; + + rc = bdev_nvme_create(&path.trid, "nvme0", attached_names, STRING_SIZE, 0, + attach_ctrlr_done, NULL, NULL, false); + CU_ASSERT(rc == 0); + + spdk_delay_us(1000); + poll_threads(); + + nbdev_ctrlr = nvme_bdev_ctrlr_get_by_name("nvme0"); + SPDK_CU_ASSERT_FATAL(nbdev_ctrlr != NULL); + + nvme_ctrlr = nvme_bdev_ctrlr_get_ctrlr(nbdev_ctrlr, &path.trid); + CU_ASSERT(nvme_ctrlr != NULL); + + bdev = nvme_bdev_ctrlr_get_bdev(nbdev_ctrlr, 1); + CU_ASSERT(bdev != NULL); + + admin_io = ut_alloc_bdev_io(SPDK_BDEV_IO_TYPE_NVME_ADMIN, bdev, NULL); + admin_io->u.nvme_passthru.cmd.opc = SPDK_NVME_OPC_GET_FEATURES; + + admin_bio = (struct nvme_bdev_io *)admin_io->driver_ctx; + + ch = spdk_get_io_channel(bdev); + SPDK_CU_ASSERT_FATAL(ch != NULL); + + admin_io->internal.ch = (struct spdk_bdev_channel *)ch; + + /* If admin passthrough is aborted by request, it should not be retried. */ + g_opts.bdev_retry_count = 1; + + admin_io->internal.in_submit_request = true; + + bdev_nvme_submit_request(ch, admin_io); + + CU_ASSERT(ctrlr->adminq.num_outstanding_reqs == 1); + CU_ASSERT(admin_io->internal.in_submit_request == true); + + req = ut_get_outstanding_nvme_request(&ctrlr->adminq, admin_bio); + SPDK_CU_ASSERT_FATAL(req != NULL); + + req->cpl.status.sc = SPDK_NVME_SC_ABORTED_BY_REQUEST; + req->cpl.status.sct = SPDK_NVME_SCT_GENERIC; + + spdk_delay_us(g_opts.nvme_adminq_poll_period_us); + poll_thread_times(0, 2); + + CU_ASSERT(ctrlr->adminq.num_outstanding_reqs == 0); + CU_ASSERT(admin_io->internal.in_submit_request == false); + CU_ASSERT(admin_io->internal.status == SPDK_BDEV_IO_STATUS_ABORTED); + + /* If bio->retry_count is not less than g_opts.bdev_retry_count, + * the failed admin passthrough should not be retried. + */ + g_opts.bdev_retry_count = 4; + + admin_io->internal.in_submit_request = true; + + bdev_nvme_submit_request(ch, admin_io); + + CU_ASSERT(ctrlr->adminq.num_outstanding_reqs == 1); + CU_ASSERT(admin_io->internal.in_submit_request == true); + + req = ut_get_outstanding_nvme_request(&ctrlr->adminq, admin_bio); + SPDK_CU_ASSERT_FATAL(req != NULL); + + req->cpl.status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; + req->cpl.status.sct = SPDK_NVME_SCT_GENERIC; + admin_bio->retry_count = 4; + + spdk_delay_us(g_opts.nvme_adminq_poll_period_us); + poll_thread_times(0, 2); + + CU_ASSERT(ctrlr->adminq.num_outstanding_reqs == 0); + CU_ASSERT(admin_io->internal.in_submit_request == false); + CU_ASSERT(admin_io->internal.status == SPDK_BDEV_IO_STATUS_NVME_ERROR); + + free(admin_io); + + spdk_put_io_channel(ch); + + poll_threads(); + + rc = bdev_nvme_delete("nvme0", &g_any_path); + CU_ASSERT(rc == 0); + + poll_threads(); + spdk_delay_us(1000); + poll_threads(); + + CU_ASSERT(nvme_bdev_ctrlr_get_by_name("nvme0") == NULL); + + g_opts.bdev_retry_count = 0; +} + int main(int argc, const char **argv) { @@ -4997,6 +5121,7 @@ main(int argc, const char **argv) CU_ADD_TEST(suite, test_retry_io_for_ana_error); CU_ADD_TEST(suite, test_retry_admin_passthru_if_ctrlr_is_resetting); CU_ADD_TEST(suite, test_retry_admin_passthru_for_path_error); + CU_ADD_TEST(suite, test_retry_admin_passthru_by_count); CU_basic_set_mode(CU_BRM_VERBOSE);