From 8fc9ac7b0ed3565a5c8fd917ed4ec394231ef4c5 Mon Sep 17 00:00:00 2001 From: JinYu Date: Thu, 25 Apr 2019 22:03:06 +0800 Subject: [PATCH] nvmf: complete all I/Os before changing sgroup to PAUSED For the nvme device, I/Os are completed asynchronously. So we need to check the outstanding I/Os before putting IO channel when we hot remove the device. We should be sure that all the I/Os have been completed when we change the sgroup->state to PAUSED, so that we can update the subsystem. Fix #615 #755 Change-Id: I0f727a7bd0734fa9be1193e1f574892ab3e68b55 Signed-off-by: JinYu Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/452038 Tested-by: SPDK CI Jenkins Reviewed-by: Ben Walker Reviewed-by: Changpeng Liu Reviewed-by: Shuhei Matsumoto --- lib/nvmf/ctrlr.c | 39 +++++++++++++++++++++++++-- lib/nvmf/nvmf.c | 10 ++++++- lib/nvmf/nvmf_internal.h | 7 ++++- test/unit/lib/nvmf/ctrlr.c/ctrlr_ut.c | 7 +++++ 4 files changed, 59 insertions(+), 4 deletions(-) diff --git a/lib/nvmf/ctrlr.c b/lib/nvmf/ctrlr.c index 027af3c18..67feb0a19 100644 --- a/lib/nvmf/ctrlr.c +++ b/lib/nvmf/ctrlr.c @@ -516,6 +516,8 @@ spdk_nvmf_ctrlr_connect(struct spdk_nvmf_request *req) } if ((subsystem->state == SPDK_NVMF_SUBSYSTEM_INACTIVE) || + (subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSING) || + (subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSED) || (subsystem->state == SPDK_NVMF_SUBSYSTEM_DEACTIVATING)) { SPDK_ERRLOG("Subsystem '%s' is not ready\n", subnqn); rsp->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC; @@ -1232,6 +1234,7 @@ spdk_nvmf_ctrlr_async_event_request(struct spdk_nvmf_request *req) { struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr; struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl; + struct spdk_nvmf_subsystem_poll_group *sgroup; SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Async Event Request\n"); @@ -1257,6 +1260,10 @@ spdk_nvmf_ctrlr_async_event_request(struct spdk_nvmf_request *req) return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; } + /* AER cmd is an exception */ + sgroup = &req->qpair->group->sgroups[ctrlr->subsys->id]; + sgroup->io_outstanding--; + ctrlr->aer_req = req; return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS; } @@ -2396,6 +2403,8 @@ spdk_nvmf_ctrlr_process_io_cmd(struct spdk_nvmf_request *req) return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; } + /* scan-build falsely reporting dereference of null pointer */ + assert(group != NULL && group->sgroups != NULL); ns_info = &group->sgroups[ctrlr->subsys->id].ns_info[nsid - 1]; if (nvmf_ns_reservation_request_check(ns_info, ctrlr, req)) { SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Reservation Conflict for nsid %u, opcode %u\n", @@ -2462,12 +2471,16 @@ spdk_nvmf_request_complete(struct spdk_nvmf_request *req) { struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl; struct spdk_nvmf_qpair *qpair; + struct spdk_nvmf_subsystem_poll_group *sgroup = NULL; rsp->sqid = 0; rsp->status.p = 0; rsp->cid = req->cmd->nvme_cmd.cid; qpair = req->qpair; + if (qpair->ctrlr) { + sgroup = &qpair->group->sgroups[qpair->ctrlr->subsys->id]; + } SPDK_DEBUGLOG(SPDK_LOG_NVMF, "cpl: cid=%u cdw0=0x%08x rsvd1=%u status=0x%04x\n", @@ -2479,6 +2492,19 @@ spdk_nvmf_request_complete(struct spdk_nvmf_request *req) SPDK_ERRLOG("Transport request completion error!\n"); } + /* AER cmd and fabric connect are exceptions */ + if (sgroup != NULL && qpair->ctrlr->aer_req != req && + !(req->cmd->nvmf_cmd.opcode == SPDK_NVME_OPC_FABRIC && + req->cmd->nvmf_cmd.fctype == SPDK_NVMF_FABRIC_COMMAND_CONNECT)) { + assert(sgroup->io_outstanding > 0); + sgroup->io_outstanding--; + if (sgroup->state == SPDK_NVMF_SUBSYSTEM_PAUSING && + sgroup->io_outstanding == 0) { + sgroup->state = SPDK_NVMF_SUBSYSTEM_PAUSED; + sgroup->cb_fn(sgroup->cb_arg, 0); + } + } + spdk_nvmf_qpair_request_cleanup(qpair); return 0; @@ -2533,27 +2559,36 @@ spdk_nvmf_request_exec(struct spdk_nvmf_request *req) { struct spdk_nvmf_qpair *qpair = req->qpair; spdk_nvmf_request_exec_status status; + struct spdk_nvmf_subsystem_poll_group *sgroup = NULL; nvmf_trace_command(req->cmd, spdk_nvmf_qpair_is_admin_queue(qpair)); + if (qpair->ctrlr) { + sgroup = &qpair->group->sgroups[qpair->ctrlr->subsys->id]; + } + if (qpair->state != SPDK_NVMF_QPAIR_ACTIVE) { req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC; req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR; /* Place the request on the outstanding list so we can keep track of it */ TAILQ_INSERT_TAIL(&qpair->outstanding, req, link); + /* Still increment io_outstanding because request_complete decrements it */ + if (sgroup != NULL) { + sgroup->io_outstanding++; + } spdk_nvmf_request_complete(req); return; } /* Check if the subsystem is paused (if there is a subsystem) */ - if (qpair->ctrlr) { - struct spdk_nvmf_subsystem_poll_group *sgroup = &qpair->group->sgroups[qpair->ctrlr->subsys->id]; + if (sgroup != NULL) { if (sgroup->state != SPDK_NVMF_SUBSYSTEM_ACTIVE) { /* The subsystem is not currently active. Queue this request. */ TAILQ_INSERT_TAIL(&sgroup->queued, req, link); return; } + sgroup->io_outstanding++; } /* Place the request on the outstanding list so we can keep track of it */ diff --git a/lib/nvmf/nvmf.c b/lib/nvmf/nvmf.c index 288f79709..72e94ffa3 100644 --- a/lib/nvmf/nvmf.c +++ b/lib/nvmf/nvmf.c @@ -1142,7 +1142,15 @@ spdk_nvmf_poll_group_pause_subsystem(struct spdk_nvmf_poll_group *group, } assert(sgroup->state == SPDK_NVMF_SUBSYSTEM_ACTIVE); - /* TODO: This currently does not quiesce I/O */ + sgroup->state = SPDK_NVMF_SUBSYSTEM_PAUSING; + + if (sgroup->io_outstanding > 0) { + sgroup->cb_fn = cb_fn; + sgroup->cb_arg = cb_arg; + return; + } + + assert(sgroup->io_outstanding == 0); sgroup->state = SPDK_NVMF_SUBSYSTEM_PAUSED; fini: if (cb_fn) { diff --git a/lib/nvmf/nvmf_internal.h b/lib/nvmf/nvmf_internal.h index 5833f1735..ede2976d7 100644 --- a/lib/nvmf/nvmf_internal.h +++ b/lib/nvmf/nvmf_internal.h @@ -127,11 +127,17 @@ struct spdk_nvmf_subsystem_pg_ns_info { struct spdk_uuid reg_hostid[SPDK_NVMF_MAX_NUM_REGISTRANTS]; }; +typedef void(*spdk_nvmf_poll_group_mod_done)(void *cb_arg, int status); + struct spdk_nvmf_subsystem_poll_group { /* Array of namespace information for each namespace indexed by nsid - 1 */ struct spdk_nvmf_subsystem_pg_ns_info *ns_info; uint32_t num_ns; + uint64_t io_outstanding; + spdk_nvmf_poll_group_mod_done cb_fn; + void *cb_arg; + enum spdk_nvmf_subsystem_state state; TAILQ_HEAD(, spdk_nvmf_request) queued; @@ -319,7 +325,6 @@ struct spdk_nvmf_subsystem { TAILQ_ENTRY(spdk_nvmf_subsystem) entries; }; -typedef void(*spdk_nvmf_poll_group_mod_done)(void *cb_arg, int status); struct spdk_nvmf_transport *spdk_nvmf_tgt_get_transport(struct spdk_nvmf_tgt *tgt, enum spdk_nvme_transport_type); diff --git a/test/unit/lib/nvmf/ctrlr.c/ctrlr_ut.c b/test/unit/lib/nvmf/ctrlr.c/ctrlr_ut.c index 4b02f3c0d..c955c98b3 100644 --- a/test/unit/lib/nvmf/ctrlr.c/ctrlr_ut.c +++ b/test/unit/lib/nvmf/ctrlr.c/ctrlr_ut.c @@ -279,6 +279,7 @@ test_connect(void) { struct spdk_nvmf_fabric_connect_data connect_data; struct spdk_nvmf_poll_group group; + struct spdk_nvmf_subsystem_poll_group *sgroups; struct spdk_nvmf_transport transport; struct spdk_nvmf_subsystem subsystem; struct spdk_nvmf_request req; @@ -339,6 +340,10 @@ test_connect(void) subsystem.state = SPDK_NVMF_SUBSYSTEM_ACTIVE; snprintf(subsystem.subnqn, sizeof(subsystem.subnqn), "%s", subnqn); + sgroups = calloc(subsystem.id + 1, sizeof(struct spdk_nvmf_subsystem_poll_group)); + sgroups[subsystem.id].io_outstanding = 5; + group.sgroups = sgroups; + memset(&cmd, 0, sizeof(cmd)); cmd.connect_cmd.opcode = SPDK_NVME_OPC_FABRIC; cmd.connect_cmd.cid = 1; @@ -643,6 +648,7 @@ test_connect(void) MOCK_CLEAR(spdk_nvmf_poll_group_create); spdk_bit_array_free(&ctrlr.qpair_mask); + free(sgroups); } static void @@ -1108,6 +1114,7 @@ test_reservation_notification_log_page(void) ctrlr.aer_req = &req; req.qpair = &qpair; TAILQ_INIT(&qpair.outstanding); + qpair.ctrlr = NULL; qpair.state = SPDK_NVMF_QPAIR_ACTIVE; TAILQ_INSERT_TAIL(&qpair.outstanding, &req, link);