From da5e62782fbb77a9efddedfa460a7f588366721b Mon Sep 17 00:00:00 2001 From: Jim Harris Date: Thu, 7 Jan 2021 00:41:32 -0700 Subject: [PATCH] bdev/nvme: detect failed async probe Track whether we attached a controller while polling the probe ctx. If we didn't when the probe ctx is done, then it means the controller failed to attach and we need to free the ctx. Fixes issue #1723. Signed-off-by: Jim Harris Change-Id: Ia7f040a073e4c824c29c0ed493f8391b69f94174 Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/5818 Community-CI: Broadcom CI Community-CI: Mellanox Build Bot Tested-by: SPDK CI Jenkins Reviewed-by: Changpeng Liu Reviewed-by: Reviewed-by: Shuhei Matsumoto --- module/bdev/nvme/bdev_nvme.c | 30 +++++++++++++++++++++++++----- module/bdev/nvme/common.h | 3 +++ 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/module/bdev/nvme/bdev_nvme.c b/module/bdev/nvme/bdev_nvme.c index a8e227959..dd861aa65 100644 --- a/module/bdev/nvme/bdev_nvme.c +++ b/module/bdev/nvme/bdev_nvme.c @@ -1797,7 +1797,14 @@ populate_namespaces_cb(struct nvme_async_probe_ctx *ctx, size_t count, int rc) ctx->cb_fn(ctx->cb_ctx, count, rc); } - free(ctx); + ctx->namespaces_populated = true; + if (ctx->probe_done) { + /* The probe was already completed, so we need to free the context + * here. This can happen for cases like OCSSD, where we need to + * send additional commands to the SSD after attach. + */ + free(ctx); + } } static void @@ -1944,8 +1951,7 @@ connect_attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, int rc; ctx = SPDK_CONTAINEROF(user_opts, struct nvme_async_probe_ctx, opts); - - spdk_poller_unregister(&ctx->poller); + ctx->ctrlr_attached = true; nvme_bdev_ctrlr = nvme_bdev_ctrlr_get_by_name(ctx->base_name); if (nvme_bdev_ctrlr) { @@ -1982,9 +1988,23 @@ bdev_nvme_async_poll(void *arg) int rc; rc = spdk_nvme_probe_poll_async(ctx->probe_ctx); - if (spdk_unlikely(rc != -EAGAIN && rc != 0)) { + if (spdk_unlikely(rc != -EAGAIN)) { + ctx->probe_done = true; spdk_poller_unregister(&ctx->poller); - free(ctx); + if (!ctx->ctrlr_attached) { + /* The probe is done, but no controller was attached. + * That means we had a failure, so report -EIO back to + * the caller (usually the RPC). populate_namespaces_cb() + * will take care of freeing the nvme_async_probe_ctx. + */ + populate_namespaces_cb(ctx, 0, -EIO); + } else if (ctx->namespaces_populated) { + /* The namespaces for the attached controller were all + * populated and the response was already sent to the + * caller (usually the RPC). So free the context here. + */ + free(ctx); + } } return SPDK_POLLER_BUSY; diff --git a/module/bdev/nvme/common.h b/module/bdev/nvme/common.h index e3ce2701a..b7555d5ad 100644 --- a/module/bdev/nvme/common.h +++ b/module/bdev/nvme/common.h @@ -143,6 +143,9 @@ struct nvme_async_probe_ctx { spdk_bdev_create_nvme_fn cb_fn; void *cb_ctx; uint32_t populates_in_progress; + bool ctrlr_attached; + bool probe_done; + bool namespaces_populated; }; struct ocssd_io_channel;