nvmf/rdma: Don't abort commands with pending RDMA ops until quiesced

Don't abort commands in states indicating an RDMA operation
is outstanding until an event indicates that all of the
work items have completed.

Change-Id: Ie2b83604bee142e383ffbcae088f4da0fd0fa658
Signed-off-by: Ben Walker <benjamin.walker@intel.com>
Reviewed-on: https://review.gerrithub.io/423413
Chandler-Test-Pool: SPDK Automated Test System <sys_sgsw@intel.com>
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
Reviewed-by: Seth Howell <seth.howell5141@gmail.com>
This commit is contained in:
Ben Walker 2018-08-24 09:33:35 -07:00 committed by Jim Harris
parent 4eb7c48d75
commit 9f6d509bf9

View File

@ -2060,20 +2060,30 @@ error:
spdk_nvmf_qpair_disconnect(&rqpair->qpair, NULL, NULL);
}
/* Clean up only the states that can be aborted at any time */
static void
_spdk_nvmf_rdma_qp_cleanup_all_states(struct spdk_nvmf_rdma_qpair *rqpair)
_spdk_nvmf_rdma_qp_cleanup_safe_states(struct spdk_nvmf_rdma_qpair *rqpair)
{
struct spdk_nvmf_rdma_request *rdma_req, *req_tmp;
spdk_nvmf_rdma_drain_state_queue(rqpair, RDMA_REQUEST_STATE_NEW);
spdk_nvmf_rdma_drain_state_queue(rqpair, RDMA_REQUEST_STATE_DATA_TRANSFER_PENDING);
/* First wipe the requests waiting for buffer from the global list */
TAILQ_FOREACH_SAFE(rdma_req, &rqpair->state_queue[RDMA_REQUEST_STATE_NEED_BUFFER], link, req_tmp) {
TAILQ_REMOVE(&rqpair->ch->pending_data_buf_queue, rdma_req, link);
}
/* Then drain the requests through the rdma queue */
spdk_nvmf_rdma_drain_state_queue(rqpair, RDMA_REQUEST_STATE_NEED_BUFFER);
spdk_nvmf_rdma_drain_state_queue(rqpair, RDMA_REQUEST_STATE_DATA_TRANSFER_PENDING);
spdk_nvmf_rdma_drain_state_queue(rqpair, RDMA_REQUEST_STATE_READY_TO_EXECUTE);
spdk_nvmf_rdma_drain_state_queue(rqpair, RDMA_REQUEST_STATE_EXECUTED);
spdk_nvmf_rdma_drain_state_queue(rqpair, RDMA_REQUEST_STATE_READY_TO_COMPLETE);
spdk_nvmf_rdma_drain_state_queue(rqpair, RDMA_REQUEST_STATE_COMPLETED);
}
/* This cleans up all memory. It is only safe to use if the rest of the software stack
* has been shut down */
static void
_spdk_nvmf_rdma_qp_cleanup_all_states(struct spdk_nvmf_rdma_qpair *rqpair)
{
_spdk_nvmf_rdma_qp_cleanup_safe_states(rqpair);
spdk_nvmf_rdma_drain_state_queue(rqpair, RDMA_REQUEST_STATE_EXECUTING);
spdk_nvmf_rdma_drain_state_queue(rqpair, RDMA_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER);
@ -2096,7 +2106,34 @@ _spdk_nvmf_rdma_qp_error(void *arg)
if (spdk_nvmf_qpair_is_admin_queue(&rqpair->qpair)) {
spdk_nvmf_ctrlr_abort_aer(rqpair->qpair.ctrlr);
}
_spdk_nvmf_rdma_qp_cleanup_all_states(rqpair);
_spdk_nvmf_rdma_qp_cleanup_safe_states(rqpair);
/* Attempt recovery. This will exit without recovering if I/O requests
* are still outstanding */
spdk_nvmf_rdma_qpair_recover(rqpair);
}
static void
_spdk_nvmf_rdma_qp_last_wqe(void *arg)
{
struct spdk_nvmf_rdma_qpair *rqpair = arg;
enum ibv_qp_state state;
state = rqpair->ibv_attr.qp_state;
if (state != IBV_QPS_ERR) {
/* Error was already recovered */
return;
}
/* Clear out the states that are safe to clear any time, plus the
* RDMA data transfer states. */
_spdk_nvmf_rdma_qp_cleanup_safe_states(rqpair);
spdk_nvmf_rdma_drain_state_queue(rqpair, RDMA_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER);
spdk_nvmf_rdma_drain_state_queue(rqpair, RDMA_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST);
spdk_nvmf_rdma_drain_state_queue(rqpair, RDMA_REQUEST_STATE_COMPLETING);
spdk_nvmf_rdma_qpair_recover(rqpair);
}
@ -2123,12 +2160,13 @@ spdk_nvmf_process_ib_event(struct spdk_nvmf_rdma_device *device)
switch (event.event_type) {
case IBV_EVENT_QP_FATAL:
case IBV_EVENT_QP_LAST_WQE_REACHED:
/* This call is thread-safe. Immediately update the IBV state on error notification. */
spdk_nvmf_rdma_update_ibv_state(rqpair);
spdk_thread_send_msg(rqpair->qpair.group->thread, _spdk_nvmf_rdma_qp_error, rqpair);
break;
case IBV_EVENT_QP_LAST_WQE_REACHED:
spdk_nvmf_rdma_update_ibv_state(rqpair);
spdk_thread_send_msg(rqpair->qpair.group->thread, _spdk_nvmf_rdma_qp_last_wqe, rqpair);
break;
case IBV_EVENT_SQ_DRAINED:
/* This event occurs frequently in both error and non-error states.
* Check if the qpair is in an error state before sending a message.