nvmf: complete all I/Os before changing sgroup to PAUSED

For the nvme device, I/Os are completed asynchronously. So we
need to check the outstanding I/Os before putting IO channel
when we hot remove the device. We should be sure that all the
I/Os have been completed when we change the sgroup->state to
PAUSED, so that we can update the subsystem.

Fix #615 #755

Change-Id: I0f727a7bd0734fa9be1193e1f574892ab3e68b55
Signed-off-by: JinYu <jin.yu@intel.com>
Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/452038
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
Reviewed-by: Changpeng Liu <changpeng.liu@intel.com>
Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
This commit is contained in:
JinYu 2019-04-25 22:03:06 +08:00 committed by Changpeng Liu
parent 17e0283db1
commit 8fc9ac7b0e
4 changed files with 59 additions and 4 deletions

View File

@ -516,6 +516,8 @@ spdk_nvmf_ctrlr_connect(struct spdk_nvmf_request *req)
}
if ((subsystem->state == SPDK_NVMF_SUBSYSTEM_INACTIVE) ||
(subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSING) ||
(subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSED) ||
(subsystem->state == SPDK_NVMF_SUBSYSTEM_DEACTIVATING)) {
SPDK_ERRLOG("Subsystem '%s' is not ready\n", subnqn);
rsp->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
@ -1232,6 +1234,7 @@ spdk_nvmf_ctrlr_async_event_request(struct spdk_nvmf_request *req)
{
struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
struct spdk_nvmf_subsystem_poll_group *sgroup;
SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Async Event Request\n");
@ -1257,6 +1260,10 @@ spdk_nvmf_ctrlr_async_event_request(struct spdk_nvmf_request *req)
return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
}
/* AER cmd is an exception */
sgroup = &req->qpair->group->sgroups[ctrlr->subsys->id];
sgroup->io_outstanding--;
ctrlr->aer_req = req;
return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
}
@ -2396,6 +2403,8 @@ spdk_nvmf_ctrlr_process_io_cmd(struct spdk_nvmf_request *req)
return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
}
/* scan-build falsely reporting dereference of null pointer */
assert(group != NULL && group->sgroups != NULL);
ns_info = &group->sgroups[ctrlr->subsys->id].ns_info[nsid - 1];
if (nvmf_ns_reservation_request_check(ns_info, ctrlr, req)) {
SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Reservation Conflict for nsid %u, opcode %u\n",
@ -2462,12 +2471,16 @@ spdk_nvmf_request_complete(struct spdk_nvmf_request *req)
{
struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
struct spdk_nvmf_qpair *qpair;
struct spdk_nvmf_subsystem_poll_group *sgroup = NULL;
rsp->sqid = 0;
rsp->status.p = 0;
rsp->cid = req->cmd->nvme_cmd.cid;
qpair = req->qpair;
if (qpair->ctrlr) {
sgroup = &qpair->group->sgroups[qpair->ctrlr->subsys->id];
}
SPDK_DEBUGLOG(SPDK_LOG_NVMF,
"cpl: cid=%u cdw0=0x%08x rsvd1=%u status=0x%04x\n",
@ -2479,6 +2492,19 @@ spdk_nvmf_request_complete(struct spdk_nvmf_request *req)
SPDK_ERRLOG("Transport request completion error!\n");
}
/* AER cmd and fabric connect are exceptions */
if (sgroup != NULL && qpair->ctrlr->aer_req != req &&
!(req->cmd->nvmf_cmd.opcode == SPDK_NVME_OPC_FABRIC &&
req->cmd->nvmf_cmd.fctype == SPDK_NVMF_FABRIC_COMMAND_CONNECT)) {
assert(sgroup->io_outstanding > 0);
sgroup->io_outstanding--;
if (sgroup->state == SPDK_NVMF_SUBSYSTEM_PAUSING &&
sgroup->io_outstanding == 0) {
sgroup->state = SPDK_NVMF_SUBSYSTEM_PAUSED;
sgroup->cb_fn(sgroup->cb_arg, 0);
}
}
spdk_nvmf_qpair_request_cleanup(qpair);
return 0;
@ -2533,27 +2559,36 @@ spdk_nvmf_request_exec(struct spdk_nvmf_request *req)
{
struct spdk_nvmf_qpair *qpair = req->qpair;
spdk_nvmf_request_exec_status status;
struct spdk_nvmf_subsystem_poll_group *sgroup = NULL;
nvmf_trace_command(req->cmd, spdk_nvmf_qpair_is_admin_queue(qpair));
if (qpair->ctrlr) {
sgroup = &qpair->group->sgroups[qpair->ctrlr->subsys->id];
}
if (qpair->state != SPDK_NVMF_QPAIR_ACTIVE) {
req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR;
/* Place the request on the outstanding list so we can keep track of it */
TAILQ_INSERT_TAIL(&qpair->outstanding, req, link);
/* Still increment io_outstanding because request_complete decrements it */
if (sgroup != NULL) {
sgroup->io_outstanding++;
}
spdk_nvmf_request_complete(req);
return;
}
/* Check if the subsystem is paused (if there is a subsystem) */
if (qpair->ctrlr) {
struct spdk_nvmf_subsystem_poll_group *sgroup = &qpair->group->sgroups[qpair->ctrlr->subsys->id];
if (sgroup != NULL) {
if (sgroup->state != SPDK_NVMF_SUBSYSTEM_ACTIVE) {
/* The subsystem is not currently active. Queue this request. */
TAILQ_INSERT_TAIL(&sgroup->queued, req, link);
return;
}
sgroup->io_outstanding++;
}
/* Place the request on the outstanding list so we can keep track of it */

View File

@ -1142,7 +1142,15 @@ spdk_nvmf_poll_group_pause_subsystem(struct spdk_nvmf_poll_group *group,
}
assert(sgroup->state == SPDK_NVMF_SUBSYSTEM_ACTIVE);
/* TODO: This currently does not quiesce I/O */
sgroup->state = SPDK_NVMF_SUBSYSTEM_PAUSING;
if (sgroup->io_outstanding > 0) {
sgroup->cb_fn = cb_fn;
sgroup->cb_arg = cb_arg;
return;
}
assert(sgroup->io_outstanding == 0);
sgroup->state = SPDK_NVMF_SUBSYSTEM_PAUSED;
fini:
if (cb_fn) {

View File

@ -127,11 +127,17 @@ struct spdk_nvmf_subsystem_pg_ns_info {
struct spdk_uuid reg_hostid[SPDK_NVMF_MAX_NUM_REGISTRANTS];
};
typedef void(*spdk_nvmf_poll_group_mod_done)(void *cb_arg, int status);
struct spdk_nvmf_subsystem_poll_group {
/* Array of namespace information for each namespace indexed by nsid - 1 */
struct spdk_nvmf_subsystem_pg_ns_info *ns_info;
uint32_t num_ns;
uint64_t io_outstanding;
spdk_nvmf_poll_group_mod_done cb_fn;
void *cb_arg;
enum spdk_nvmf_subsystem_state state;
TAILQ_HEAD(, spdk_nvmf_request) queued;
@ -319,7 +325,6 @@ struct spdk_nvmf_subsystem {
TAILQ_ENTRY(spdk_nvmf_subsystem) entries;
};
typedef void(*spdk_nvmf_poll_group_mod_done)(void *cb_arg, int status);
struct spdk_nvmf_transport *spdk_nvmf_tgt_get_transport(struct spdk_nvmf_tgt *tgt,
enum spdk_nvme_transport_type);

View File

@ -279,6 +279,7 @@ test_connect(void)
{
struct spdk_nvmf_fabric_connect_data connect_data;
struct spdk_nvmf_poll_group group;
struct spdk_nvmf_subsystem_poll_group *sgroups;
struct spdk_nvmf_transport transport;
struct spdk_nvmf_subsystem subsystem;
struct spdk_nvmf_request req;
@ -339,6 +340,10 @@ test_connect(void)
subsystem.state = SPDK_NVMF_SUBSYSTEM_ACTIVE;
snprintf(subsystem.subnqn, sizeof(subsystem.subnqn), "%s", subnqn);
sgroups = calloc(subsystem.id + 1, sizeof(struct spdk_nvmf_subsystem_poll_group));
sgroups[subsystem.id].io_outstanding = 5;
group.sgroups = sgroups;
memset(&cmd, 0, sizeof(cmd));
cmd.connect_cmd.opcode = SPDK_NVME_OPC_FABRIC;
cmd.connect_cmd.cid = 1;
@ -643,6 +648,7 @@ test_connect(void)
MOCK_CLEAR(spdk_nvmf_poll_group_create);
spdk_bit_array_free(&ctrlr.qpair_mask);
free(sgroups);
}
static void
@ -1108,6 +1114,7 @@ test_reservation_notification_log_page(void)
ctrlr.aer_req = &req;
req.qpair = &qpair;
TAILQ_INIT(&qpair.outstanding);
qpair.ctrlr = NULL;
qpair.state = SPDK_NVMF_QPAIR_ACTIVE;
TAILQ_INSERT_TAIL(&qpair.outstanding, &req, link);