From d3b788e9ab773daf34c994083e7bed85f4e15fa2 Mon Sep 17 00:00:00 2001 From: Jim Harris Date: Tue, 27 Oct 2020 11:08:26 -0700 Subject: [PATCH] nvme: continue probing ctrlrs even if one fails It is possible that a single probe_ctx could be used to probe multiple newly attached nvme controllers. If one of those controllers is removed during this process, the rest of the controllers do not get probed and can even get stuck in a zombie state. It is better to just continue with probing the rest of the controllers. Fixes issue #1611. Signed-off-by: Jim Harris Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/4945 (master) (cherry picked from commit ddf86600bb1675d4fc501e70ea51b861369f50e8) Change-Id: I4156ee8b50e8d52cfeee7224f210a58bb773e939 Signed-off-by: Tomasz Zawadzki Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/4958 Tested-by: SPDK CI Jenkins Reviewed-by: Jim Harris Reviewed-by: Shuhei Matsumoto --- include/spdk/nvme.h | 12 +++++++++--- lib/nvme/nvme.c | 22 +++++++--------------- test/unit/lib/nvme/nvme.c/nvme_ut.c | 2 +- 3 files changed, 17 insertions(+), 19 deletions(-) diff --git a/include/spdk/nvme.h b/include/spdk/nvme.h index e3b2d0c4e..68867ec06 100644 --- a/include/spdk/nvme.h +++ b/include/spdk/nvme.h @@ -773,9 +773,16 @@ struct spdk_nvme_probe_ctx *spdk_nvme_probe_async(const struct spdk_nvme_transpo spdk_nvme_remove_cb remove_cb); /** - * Start controllers in the context list. + * Proceed with attaching contollers associated with the probe context. * - * Users may call the function util it returns True. + * The probe context is one returned from a previous call to + * spdk_nvme_probe_async(). Users must call this function on the + * probe context until it returns 0. + * + * If any controllers fail to attach, there is no explicit notification. + * Users can detect attachment failure by comparing attach_cb invocations + * with the number of times where the user returned true for the + * probe_cb. * * \param probe_ctx Context used to track probe actions. * @@ -783,7 +790,6 @@ struct spdk_nvme_probe_ctx *spdk_nvme_probe_async(const struct spdk_nvme_transpo * is also freed and no longer valid. * \return -EAGAIN if there are still pending probe operations; user must call * spdk_nvme_probe_poll_async again to continue progress. - * \return value other than 0 and -EAGAIN probe error with one controller. */ int spdk_nvme_probe_poll_async(struct spdk_nvme_probe_ctx *probe_ctx); diff --git a/lib/nvme/nvme.c b/lib/nvme/nvme.c index 830d9a0c4..7aa75e7a3 100644 --- a/lib/nvme/nvme.c +++ b/lib/nvme/nvme.c @@ -688,11 +688,11 @@ nvme_ctrlr_probe(const struct spdk_nvme_transport_id *trid, return 1; } -static int +static void nvme_ctrlr_poll_internal(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_probe_ctx *probe_ctx) { - int rc = 0; + int rc = 0; rc = nvme_ctrlr_process_init(ctrlr); @@ -704,11 +704,11 @@ nvme_ctrlr_poll_internal(struct spdk_nvme_ctrlr *ctrlr, nvme_ctrlr_fail(ctrlr, false); nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); nvme_ctrlr_destruct(ctrlr); - return rc; + return; } if (ctrlr->state != NVME_CTRLR_STATE_READY) { - return 0; + return; } STAILQ_INIT(&ctrlr->io_producers); @@ -735,10 +735,7 @@ nvme_ctrlr_poll_internal(struct spdk_nvme_ctrlr *ctrlr, if (probe_ctx->attach_cb) { probe_ctx->attach_cb(probe_ctx->cb_ctx, &ctrlr->trid, ctrlr, &ctrlr->opts); - return 0; } - - return 0; } static int @@ -1545,7 +1542,6 @@ spdk_nvme_probe_async(const struct spdk_nvme_transport_id *trid, int spdk_nvme_probe_poll_async(struct spdk_nvme_probe_ctx *probe_ctx) { - int rc = 0; struct spdk_nvme_ctrlr *ctrlr, *ctrlr_tmp; if (!spdk_process_is_primary() && probe_ctx->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) { @@ -1554,19 +1550,15 @@ spdk_nvme_probe_poll_async(struct spdk_nvme_probe_ctx *probe_ctx) } TAILQ_FOREACH_SAFE(ctrlr, &probe_ctx->init_ctrlrs, tailq, ctrlr_tmp) { - rc = nvme_ctrlr_poll_internal(ctrlr, probe_ctx); - if (rc != 0) { - rc = -EIO; - break; - } + nvme_ctrlr_poll_internal(ctrlr, probe_ctx); } - if (rc != 0 || TAILQ_EMPTY(&probe_ctx->init_ctrlrs)) { + if (TAILQ_EMPTY(&probe_ctx->init_ctrlrs)) { nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock); g_spdk_nvme_driver->initialized = true; nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock); free(probe_ctx); - return rc; + return 0; } return -EAGAIN; diff --git a/test/unit/lib/nvme/nvme.c/nvme_ut.c b/test/unit/lib/nvme/nvme.c/nvme_ut.c index f1d080875..c0549f6e7 100644 --- a/test/unit/lib/nvme/nvme.c/nvme_ut.c +++ b/test/unit/lib/nvme/nvme.c/nvme_ut.c @@ -382,7 +382,7 @@ test_nvme_init_controllers(void) probe_ctx->attach_cb = attach_cb; probe_ctx->trid.trtype = SPDK_NVME_TRANSPORT_PCIE; rc = nvme_init_controllers(probe_ctx); - CU_ASSERT(rc != 0); + CU_ASSERT(rc == 0); CU_ASSERT(g_spdk_nvme_driver->initialized == true); CU_ASSERT(ut_destruct_called == true);