lib/nvme: Make nvme_ctrlr_shutdown() asynchronous

This patch is the first of the patch series to make spdk_nvme_detach()
asynchronous.

We have lengthy shutdown notification, i.e., we have to wait a long time
until shutdown processing is completed, in some SSDs. If the running system
has many such SSDs, we see large intolerable delay.

SPDK provides a controller option, no_shn_notification as a workaround.
We can use the workaround if the use case of the detach is to switch to
the next application without system reboot. However, we cannot use the
workaround if we want to do system reboot after detach.

To mitigate such lengthy shutdown notification, we need to parallelize
detachment among SSDs.

Hence the patch series will introduce an asynchronous detach API and
will use the API to parallelize detachment.

This patch adds the following changes.

Introduce a context structure and separate nvme_ctrlr_shutdown()
itno nvme_ctrlr_shutdown_async() and nvme_ctrlr_shutdown_poll_async()
using the context structure.

Name the context structure as nvme_ctrlr_detach_ctx because it will be
used only in internal APIs. The upcoming public APIs will support
multiple detachment and will have the contest structure named as
spdk_nvme_detach_ctx.

Use TSC instead of counter because polling interval will be controlled
by the caller.

Use the convenient macro, SPDK_CEIL_DIV(), to round off the time
value in milliseconds.

Signed-off-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Change-Id: I9e2355fd24b6d6a4d6c1813577d53822304d4f33
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/4414
Reviewed-by: Jacek Kalwas <jacek.kalwas@intel.com>
Reviewed-by: Aleksey Marchuk <alexeymar@mellanox.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
This commit is contained in:
Shuhei Matsumoto 2020-10-14 07:09:28 +09:00 committed by Jim Harris
parent c865839736
commit 257fcb7352
2 changed files with 48 additions and 22 deletions

View File

@ -938,12 +938,10 @@ spdk_nvme_ctrlr_fail(struct spdk_nvme_ctrlr *ctrlr)
} }
static void static void
nvme_ctrlr_shutdown(struct spdk_nvme_ctrlr *ctrlr) nvme_ctrlr_shutdown_async(struct spdk_nvme_ctrlr *ctrlr,
struct nvme_ctrlr_detach_ctx *ctx)
{ {
union spdk_nvme_cc_register cc; union spdk_nvme_cc_register cc;
union spdk_nvme_csts_register csts;
uint32_t ms_waited = 0;
uint32_t shutdown_timeout_ms;
if (ctrlr->is_removed) { if (ctrlr->is_removed) {
return; return;
@ -970,31 +968,45 @@ nvme_ctrlr_shutdown(struct spdk_nvme_ctrlr *ctrlr)
* wait before proceeding. * wait before proceeding.
*/ */
SPDK_DEBUGLOG(nvme, "RTD3E = %" PRIu32 " us\n", ctrlr->cdata.rtd3e); SPDK_DEBUGLOG(nvme, "RTD3E = %" PRIu32 " us\n", ctrlr->cdata.rtd3e);
shutdown_timeout_ms = (ctrlr->cdata.rtd3e + 999) / 1000; ctx->shutdown_timeout_ms = SPDK_CEIL_DIV(ctrlr->cdata.rtd3e, 1000);
shutdown_timeout_ms = spdk_max(shutdown_timeout_ms, 10000); ctx->shutdown_timeout_ms = spdk_max(ctx->shutdown_timeout_ms, 10000);
SPDK_DEBUGLOG(nvme, "shutdown timeout = %" PRIu32 " ms\n", shutdown_timeout_ms); SPDK_DEBUGLOG(nvme, "shutdown timeout = %" PRIu32 " ms\n",
ctx->shutdown_timeout_ms);
do { ctx->shutdown_start_tsc = spdk_get_ticks();
if (nvme_ctrlr_get_csts(ctrlr, &csts)) { }
SPDK_ERRLOG("ctrlr %s get_csts() failed\n", ctrlr->trid.traddr);
return;
}
if (csts.bits.shst == SPDK_NVME_SHST_COMPLETE) { static int
SPDK_DEBUGLOG(nvme, "ctrlr %s shutdown complete in %u milliseconds\n", nvme_ctrlr_shutdown_poll_async(struct spdk_nvme_ctrlr *ctrlr,
ctrlr->trid.traddr, ms_waited); struct nvme_ctrlr_detach_ctx *ctx)
return; {
} union spdk_nvme_csts_register csts;
uint32_t ms_waited;
nvme_delay(1000); ms_waited = (spdk_get_ticks() - ctx->shutdown_start_tsc) * 1000 / spdk_get_ticks_hz();
ms_waited++;
} while (ms_waited < shutdown_timeout_ms); if (nvme_ctrlr_get_csts(ctrlr, &csts)) {
SPDK_ERRLOG("ctrlr %s get_csts() failed\n", ctrlr->trid.traddr);
return -EIO;
}
if (csts.bits.shst == SPDK_NVME_SHST_COMPLETE) {
SPDK_DEBUGLOG(nvme, "ctrlr %s shutdown complete in %u milliseconds\n",
ctrlr->trid.traddr, ms_waited);
return 0;
}
if (ms_waited < ctx->shutdown_timeout_ms) {
return -EAGAIN;
}
SPDK_ERRLOG("ctrlr %s did not shutdown within %u milliseconds\n", SPDK_ERRLOG("ctrlr %s did not shutdown within %u milliseconds\n",
ctrlr->trid.traddr, shutdown_timeout_ms); ctrlr->trid.traddr, ctx->shutdown_timeout_ms);
if (ctrlr->quirks & NVME_QUIRK_SHST_COMPLETE) { if (ctrlr->quirks & NVME_QUIRK_SHST_COMPLETE) {
SPDK_ERRLOG("likely due to shutdown handling in the VMWare emulated NVMe SSD\n"); SPDK_ERRLOG("likely due to shutdown handling in the VMWare emulated NVMe SSD\n");
} }
return 0;
} }
static int static int
@ -3260,6 +3272,8 @@ void
nvme_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr) nvme_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr)
{ {
struct spdk_nvme_qpair *qpair, *tmp; struct spdk_nvme_qpair *qpair, *tmp;
struct nvme_ctrlr_detach_ctx ctx = {};
int rc;
SPDK_DEBUGLOG(nvme, "Prepare to destruct SSD: %s\n", ctrlr->trid.traddr); SPDK_DEBUGLOG(nvme, "Prepare to destruct SSD: %s\n", ctrlr->trid.traddr);
@ -3282,7 +3296,14 @@ nvme_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr)
ctrlr->trid.traddr); ctrlr->trid.traddr);
nvme_ctrlr_disable(ctrlr); nvme_ctrlr_disable(ctrlr);
} else { } else {
nvme_ctrlr_shutdown(ctrlr); nvme_ctrlr_shutdown_async(ctrlr, &ctx);
while (1) {
rc = nvme_ctrlr_shutdown_poll_async(ctrlr, &ctx);
if (rc != -EAGAIN) {
break;
}
nvme_delay(1000);
}
} }
nvme_ctrlr_destruct_namespaces(ctrlr); nvme_ctrlr_destruct_namespaces(ctrlr);

View File

@ -843,6 +843,11 @@ struct spdk_nvme_probe_ctx {
TAILQ_HEAD(, spdk_nvme_ctrlr) init_ctrlrs; TAILQ_HEAD(, spdk_nvme_ctrlr) init_ctrlrs;
}; };
struct nvme_ctrlr_detach_ctx {
uint64_t shutdown_start_tsc;
uint32_t shutdown_timeout_ms;
};
struct nvme_driver { struct nvme_driver {
pthread_mutex_t lock; pthread_mutex_t lock;