From e06896b94cef6c7a27b3380069fd16e09a9323f9 Mon Sep 17 00:00:00 2001 From: Ben Walker Date: Fri, 7 Sep 2018 11:16:32 -0700 Subject: [PATCH] nvmf/rdma: On getting a wc error, force the qpair into the error state This initiates an error recovery instead of a disconnect. The error recovery may result in a disconnect if the qpair is not recoverable. This also resolves an issue where the disconnect may immediately release the resources associated with the rqpair, but upcoming wc entries may still reference it. Change-Id: I9d9e212a83129412e049c91c02725699ce2cac11 Signed-off-by: Ben Walker Reviewed-on: https://review.gerrithub.io/425010 Chandler-Test-Pool: SPDK Automated Test System Tested-by: SPDK CI Jenkins Reviewed-by: Jim Harris Reviewed-by: Changpeng Liu Reviewed-by: Shuhei Matsumoto --- lib/nvmf/rdma.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/lib/nvmf/rdma.c b/lib/nvmf/rdma.c index c59e5b8de..ba2d45be0 100644 --- a/lib/nvmf/rdma.c +++ b/lib/nvmf/rdma.c @@ -2588,7 +2588,7 @@ spdk_nvmf_rdma_poller_poll(struct spdk_nvmf_rdma_transport *rtransport, rdma_req = get_rdma_req_from_wc(&wc[i]); rqpair = SPDK_CONTAINEROF(rdma_req->req.qpair, struct spdk_nvmf_rdma_qpair, qpair); - /* We're going to kill the connection, so force the request into + /* We're going to attempt an error recovery, so force the request into * the completed state. */ spdk_nvmf_rdma_request_set_state(rdma_req, RDMA_REQUEST_STATE_COMPLETED); spdk_nvmf_rdma_request_process(rtransport, rdma_req); @@ -2598,17 +2598,16 @@ spdk_nvmf_rdma_poller_poll(struct spdk_nvmf_rdma_transport *rtransport, rqpair = rdma_recv->qpair; /* Dump this into the incoming queue. This gets cleaned up when - * the queue pair disconnects. */ + * the queue pair disconnects or recovers. */ TAILQ_INSERT_TAIL(&rqpair->incoming_queue, rdma_recv, link); + break; default: SPDK_ERRLOG("Received an unknown opcode on the CQ: %d\n", wc[i].opcode); continue; } - /* Begin disconnecting the qpair. This is ok to call multiple times if lots of - * errors occur on the same qpair in the same ibv_poll_cq batch. */ - spdk_nvmf_qpair_disconnect(&rqpair->qpair, NULL, NULL); - + /* Set the qpair to the error state. This will initiate a recovery. */ + spdk_nvmf_rdma_set_ibv_state(rqpair, IBV_QPS_ERR); continue; }