From 257fcb735228584d9ba1c75f8aeef1399b092b5e Mon Sep 17 00:00:00 2001 From: Shuhei Matsumoto Date: Wed, 14 Oct 2020 07:09:28 +0900 Subject: [PATCH] lib/nvme: Make nvme_ctrlr_shutdown() asynchronous This patch is the first of the patch series to make spdk_nvme_detach() asynchronous. We have lengthy shutdown notification, i.e., we have to wait a long time until shutdown processing is completed, in some SSDs. If the running system has many such SSDs, we see large intolerable delay. SPDK provides a controller option, no_shn_notification as a workaround. We can use the workaround if the use case of the detach is to switch to the next application without system reboot. However, we cannot use the workaround if we want to do system reboot after detach. To mitigate such lengthy shutdown notification, we need to parallelize detachment among SSDs. Hence the patch series will introduce an asynchronous detach API and will use the API to parallelize detachment. This patch adds the following changes. Introduce a context structure and separate nvme_ctrlr_shutdown() itno nvme_ctrlr_shutdown_async() and nvme_ctrlr_shutdown_poll_async() using the context structure. Name the context structure as nvme_ctrlr_detach_ctx because it will be used only in internal APIs. The upcoming public APIs will support multiple detachment and will have the contest structure named as spdk_nvme_detach_ctx. Use TSC instead of counter because polling interval will be controlled by the caller. Use the convenient macro, SPDK_CEIL_DIV(), to round off the time value in milliseconds. Signed-off-by: Shuhei Matsumoto Change-Id: I9e2355fd24b6d6a4d6c1813577d53822304d4f33 Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/4414 Reviewed-by: Jacek Kalwas Reviewed-by: Aleksey Marchuk Reviewed-by: Jim Harris Reviewed-by: Ben Walker Tested-by: SPDK CI Jenkins --- lib/nvme/nvme_ctrlr.c | 65 ++++++++++++++++++++++++++-------------- lib/nvme/nvme_internal.h | 5 ++++ 2 files changed, 48 insertions(+), 22 deletions(-) diff --git a/lib/nvme/nvme_ctrlr.c b/lib/nvme/nvme_ctrlr.c index 45963e63f..bba6d8340 100644 --- a/lib/nvme/nvme_ctrlr.c +++ b/lib/nvme/nvme_ctrlr.c @@ -938,12 +938,10 @@ spdk_nvme_ctrlr_fail(struct spdk_nvme_ctrlr *ctrlr) } static void -nvme_ctrlr_shutdown(struct spdk_nvme_ctrlr *ctrlr) +nvme_ctrlr_shutdown_async(struct spdk_nvme_ctrlr *ctrlr, + struct nvme_ctrlr_detach_ctx *ctx) { union spdk_nvme_cc_register cc; - union spdk_nvme_csts_register csts; - uint32_t ms_waited = 0; - uint32_t shutdown_timeout_ms; if (ctrlr->is_removed) { return; @@ -970,31 +968,45 @@ nvme_ctrlr_shutdown(struct spdk_nvme_ctrlr *ctrlr) * wait before proceeding. */ SPDK_DEBUGLOG(nvme, "RTD3E = %" PRIu32 " us\n", ctrlr->cdata.rtd3e); - shutdown_timeout_ms = (ctrlr->cdata.rtd3e + 999) / 1000; - shutdown_timeout_ms = spdk_max(shutdown_timeout_ms, 10000); - SPDK_DEBUGLOG(nvme, "shutdown timeout = %" PRIu32 " ms\n", shutdown_timeout_ms); + ctx->shutdown_timeout_ms = SPDK_CEIL_DIV(ctrlr->cdata.rtd3e, 1000); + ctx->shutdown_timeout_ms = spdk_max(ctx->shutdown_timeout_ms, 10000); + SPDK_DEBUGLOG(nvme, "shutdown timeout = %" PRIu32 " ms\n", + ctx->shutdown_timeout_ms); - do { - if (nvme_ctrlr_get_csts(ctrlr, &csts)) { - SPDK_ERRLOG("ctrlr %s get_csts() failed\n", ctrlr->trid.traddr); - return; - } + ctx->shutdown_start_tsc = spdk_get_ticks(); +} - if (csts.bits.shst == SPDK_NVME_SHST_COMPLETE) { - SPDK_DEBUGLOG(nvme, "ctrlr %s shutdown complete in %u milliseconds\n", - ctrlr->trid.traddr, ms_waited); - return; - } +static int +nvme_ctrlr_shutdown_poll_async(struct spdk_nvme_ctrlr *ctrlr, + struct nvme_ctrlr_detach_ctx *ctx) +{ + union spdk_nvme_csts_register csts; + uint32_t ms_waited; - nvme_delay(1000); - ms_waited++; - } while (ms_waited < shutdown_timeout_ms); + ms_waited = (spdk_get_ticks() - ctx->shutdown_start_tsc) * 1000 / spdk_get_ticks_hz(); + + if (nvme_ctrlr_get_csts(ctrlr, &csts)) { + SPDK_ERRLOG("ctrlr %s get_csts() failed\n", ctrlr->trid.traddr); + return -EIO; + } + + if (csts.bits.shst == SPDK_NVME_SHST_COMPLETE) { + SPDK_DEBUGLOG(nvme, "ctrlr %s shutdown complete in %u milliseconds\n", + ctrlr->trid.traddr, ms_waited); + return 0; + } + + if (ms_waited < ctx->shutdown_timeout_ms) { + return -EAGAIN; + } SPDK_ERRLOG("ctrlr %s did not shutdown within %u milliseconds\n", - ctrlr->trid.traddr, shutdown_timeout_ms); + ctrlr->trid.traddr, ctx->shutdown_timeout_ms); if (ctrlr->quirks & NVME_QUIRK_SHST_COMPLETE) { SPDK_ERRLOG("likely due to shutdown handling in the VMWare emulated NVMe SSD\n"); } + + return 0; } static int @@ -3260,6 +3272,8 @@ void nvme_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr) { struct spdk_nvme_qpair *qpair, *tmp; + struct nvme_ctrlr_detach_ctx ctx = {}; + int rc; SPDK_DEBUGLOG(nvme, "Prepare to destruct SSD: %s\n", ctrlr->trid.traddr); @@ -3282,7 +3296,14 @@ nvme_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr) ctrlr->trid.traddr); nvme_ctrlr_disable(ctrlr); } else { - nvme_ctrlr_shutdown(ctrlr); + nvme_ctrlr_shutdown_async(ctrlr, &ctx); + while (1) { + rc = nvme_ctrlr_shutdown_poll_async(ctrlr, &ctx); + if (rc != -EAGAIN) { + break; + } + nvme_delay(1000); + } } nvme_ctrlr_destruct_namespaces(ctrlr); diff --git a/lib/nvme/nvme_internal.h b/lib/nvme/nvme_internal.h index 8a946e367..7dd81ebca 100644 --- a/lib/nvme/nvme_internal.h +++ b/lib/nvme/nvme_internal.h @@ -843,6 +843,11 @@ struct spdk_nvme_probe_ctx { TAILQ_HEAD(, spdk_nvme_ctrlr) init_ctrlrs; }; +struct nvme_ctrlr_detach_ctx { + uint64_t shutdown_start_tsc; + uint32_t shutdown_timeout_ms; +}; + struct nvme_driver { pthread_mutex_t lock;