From ac3a42b15c67dfda9276b366dbf0c4664b8ebb49 Mon Sep 17 00:00:00 2001 From: Jim Harris Date: Wed, 30 Jun 2021 20:30:13 +0000 Subject: [PATCH] nvmf: retry connect commands internally when subsys not ready It is better to not fail connect commands when a subsystem is not ready. The host will not be expecting that and will typically treat it as a catastrophic failure (i.e. it won't retry the connect). So instead when this situation occurs, start a poller for the connect request. We will continue to retry processing it until the subsystem is ready to handle it. Fixes issue #1985. Signed-off-by: Jim Harris Change-Id: Id8835df8f0edf1e889fdd7e754e261c2a880cbb6 Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/8571 Tested-by: SPDK CI Jenkins Community-CI: Broadcom CI Community-CI: Mellanox Build Bot Reviewed-by: Ziye Yang Reviewed-by: Changpeng Liu Reviewed-by: Shuhei Matsumoto --- lib/nvmf/ctrlr.c | 44 ++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 40 insertions(+), 4 deletions(-) diff --git a/lib/nvmf/ctrlr.c b/lib/nvmf/ctrlr.c index e9cb7c3d0..21fac4f6d 100644 --- a/lib/nvmf/ctrlr.c +++ b/lib/nvmf/ctrlr.c @@ -801,6 +801,25 @@ out: return status; } +static int nvmf_ctrlr_cmd_connect(struct spdk_nvmf_request *req); + +static int +retry_connect(void *arg) +{ + struct spdk_nvmf_request *req = arg; + struct spdk_nvmf_subsystem_poll_group *sgroup; + int rc; + + sgroup = nvmf_subsystem_pg_from_connect_cmd(req); + sgroup->mgmt_io_outstanding++; + spdk_poller_unregister(&req->poller); + rc = nvmf_ctrlr_cmd_connect(req); + if (rc == SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE) { + _nvmf_request_complete(req); + } + return SPDK_POLLER_BUSY; +} + static int nvmf_ctrlr_cmd_connect(struct spdk_nvmf_request *req) { @@ -825,10 +844,27 @@ nvmf_ctrlr_cmd_connect(struct spdk_nvmf_request *req) (subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSING) || (subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSED) || (subsystem->state == SPDK_NVMF_SUBSYSTEM_DEACTIVATING)) { - SPDK_ERRLOG("Subsystem '%s' is not ready\n", subsystem->subnqn); - rsp->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC; - rsp->status.sc = SPDK_NVMF_FABRIC_SC_CONTROLLER_BUSY; - return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + struct spdk_nvmf_subsystem_poll_group *sgroup; + + if (req->timeout_tsc == 0) { + /* We will only retry the request up to 1 second. */ + req->timeout_tsc = spdk_get_ticks() + spdk_get_ticks_hz(); + } else if (spdk_get_ticks() > req->timeout_tsc) { + SPDK_ERRLOG("Subsystem '%s' was not ready for 1 second\n", subsystem->subnqn); + rsp->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC; + rsp->status.sc = SPDK_NVMF_FABRIC_SC_CONTROLLER_BUSY; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + + /* Subsystem is not ready to handle a connect. Use a poller to retry it + * again later. Decrement the mgmt_io_outstanding to avoid the + * subsystem waiting for this command to complete before unpausing. + */ + sgroup = nvmf_subsystem_pg_from_connect_cmd(req); + sgroup->mgmt_io_outstanding--; + SPDK_DEBUGLOG(nvmf, "Subsystem '%s' is not ready for connect, retrying...\n", subsystem->subnqn); + req->poller = SPDK_POLLER_REGISTER(retry_connect, req, 100); + return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS; } /* Ensure that hostnqn is null terminated */