nvmf: retry connect commands internally when subsys not ready
It is better to not fail connect commands when a subsystem is not ready. The host will not be expecting that and will typically treat it as a catastrophic failure (i.e. it won't retry the connect). So instead when this situation occurs, start a poller for the connect request. We will continue to retry processing it until the subsystem is ready to handle it. Fixes issue #1985. Signed-off-by: Jim Harris <james.r.harris@intel.com> Change-Id: Id8835df8f0edf1e889fdd7e754e261c2a880cbb6 Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/8571 Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com> Community-CI: Mellanox Build Bot Reviewed-by: Ziye Yang <ziye.yang@intel.com> Reviewed-by: Changpeng Liu <changpeng.liu@intel.com> Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
This commit is contained in:
parent
65ef1f32a6
commit
ac3a42b15c
@ -801,6 +801,25 @@ out:
|
|||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int nvmf_ctrlr_cmd_connect(struct spdk_nvmf_request *req);
|
||||||
|
|
||||||
|
static int
|
||||||
|
retry_connect(void *arg)
|
||||||
|
{
|
||||||
|
struct spdk_nvmf_request *req = arg;
|
||||||
|
struct spdk_nvmf_subsystem_poll_group *sgroup;
|
||||||
|
int rc;
|
||||||
|
|
||||||
|
sgroup = nvmf_subsystem_pg_from_connect_cmd(req);
|
||||||
|
sgroup->mgmt_io_outstanding++;
|
||||||
|
spdk_poller_unregister(&req->poller);
|
||||||
|
rc = nvmf_ctrlr_cmd_connect(req);
|
||||||
|
if (rc == SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE) {
|
||||||
|
_nvmf_request_complete(req);
|
||||||
|
}
|
||||||
|
return SPDK_POLLER_BUSY;
|
||||||
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
nvmf_ctrlr_cmd_connect(struct spdk_nvmf_request *req)
|
nvmf_ctrlr_cmd_connect(struct spdk_nvmf_request *req)
|
||||||
{
|
{
|
||||||
@ -825,12 +844,29 @@ nvmf_ctrlr_cmd_connect(struct spdk_nvmf_request *req)
|
|||||||
(subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSING) ||
|
(subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSING) ||
|
||||||
(subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSED) ||
|
(subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSED) ||
|
||||||
(subsystem->state == SPDK_NVMF_SUBSYSTEM_DEACTIVATING)) {
|
(subsystem->state == SPDK_NVMF_SUBSYSTEM_DEACTIVATING)) {
|
||||||
SPDK_ERRLOG("Subsystem '%s' is not ready\n", subsystem->subnqn);
|
struct spdk_nvmf_subsystem_poll_group *sgroup;
|
||||||
|
|
||||||
|
if (req->timeout_tsc == 0) {
|
||||||
|
/* We will only retry the request up to 1 second. */
|
||||||
|
req->timeout_tsc = spdk_get_ticks() + spdk_get_ticks_hz();
|
||||||
|
} else if (spdk_get_ticks() > req->timeout_tsc) {
|
||||||
|
SPDK_ERRLOG("Subsystem '%s' was not ready for 1 second\n", subsystem->subnqn);
|
||||||
rsp->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
|
rsp->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
|
||||||
rsp->status.sc = SPDK_NVMF_FABRIC_SC_CONTROLLER_BUSY;
|
rsp->status.sc = SPDK_NVMF_FABRIC_SC_CONTROLLER_BUSY;
|
||||||
return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
|
return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Subsystem is not ready to handle a connect. Use a poller to retry it
|
||||||
|
* again later. Decrement the mgmt_io_outstanding to avoid the
|
||||||
|
* subsystem waiting for this command to complete before unpausing.
|
||||||
|
*/
|
||||||
|
sgroup = nvmf_subsystem_pg_from_connect_cmd(req);
|
||||||
|
sgroup->mgmt_io_outstanding--;
|
||||||
|
SPDK_DEBUGLOG(nvmf, "Subsystem '%s' is not ready for connect, retrying...\n", subsystem->subnqn);
|
||||||
|
req->poller = SPDK_POLLER_REGISTER(retry_connect, req, 100);
|
||||||
|
return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
|
||||||
|
}
|
||||||
|
|
||||||
/* Ensure that hostnqn is null terminated */
|
/* Ensure that hostnqn is null terminated */
|
||||||
if (!memchr(data->hostnqn, '\0', SPDK_NVMF_NQN_MAX_LEN + 1)) {
|
if (!memchr(data->hostnqn, '\0', SPDK_NVMF_NQN_MAX_LEN + 1)) {
|
||||||
SPDK_ERRLOG("Connect HOSTNQN is not null terminated\n");
|
SPDK_ERRLOG("Connect HOSTNQN is not null terminated\n");
|
||||||
|
Loading…
Reference in New Issue
Block a user