nvmf/rdma: Treat nvmf qpair state as read-only

Decide which action to take based on a combination of the
nvmf qpair state and the RDMA qpair state.

Change-Id: I338ace9dd66dd8dcf81aa30e51758aa81768d7f4
Signed-off-by: Ben Walker <benjamin.walker@intel.com>
Reviewed-on: https://review.gerrithub.io/421162
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Chandler-Test-Pool: SPDK Automated Test System <sys_sgsw@intel.com>
Reviewed-by: Seth Howell <seth.howell5141@gmail.com>
Reviewed-by: Changpeng Liu <changpeng.liu@intel.com>
Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
This commit is contained in:
Ben Walker 2018-08-02 09:56:23 -07:00 committed by Jim Harris
parent a1acca9274
commit 531fd76d10

View File

@ -342,7 +342,7 @@ static const char *str_ibv_qp_state[] = {
}; };
static enum ibv_qp_state static enum ibv_qp_state
spdk_nvmf_rdma_get_ibv_state(struct spdk_nvmf_rdma_qpair *rqpair) { spdk_nvmf_rdma_update_ibv_state(struct spdk_nvmf_rdma_qpair *rqpair) {
int rc; int rc;
/* All the attributes needed for recovery */ /* All the attributes needed for recovery */
@ -432,7 +432,7 @@ spdk_nvmf_rdma_set_ibv_state(struct spdk_nvmf_rdma_qpair *rqpair,
return rc; return rc;
} }
state = spdk_nvmf_rdma_get_ibv_state(rqpair); state = spdk_nvmf_rdma_update_ibv_state(rqpair);
if (state != new_state) { if (state != new_state) {
SPDK_ERRLOG("QP#%d: expected state: %s, actual state: %s\n", SPDK_ERRLOG("QP#%d: expected state: %s, actual state: %s\n",
@ -909,6 +909,7 @@ static int
nvmf_rdma_disconnect(struct rdma_cm_event *evt) nvmf_rdma_disconnect(struct rdma_cm_event *evt)
{ {
struct spdk_nvmf_qpair *qpair; struct spdk_nvmf_qpair *qpair;
struct spdk_nvmf_rdma_qpair *rqpair;
if (evt->id == NULL) { if (evt->id == NULL) {
SPDK_ERRLOG("disconnect request: missing cm_id\n"); SPDK_ERRLOG("disconnect request: missing cm_id\n");
@ -923,6 +924,9 @@ nvmf_rdma_disconnect(struct rdma_cm_event *evt)
/* ack the disconnect event before rdma_destroy_id */ /* ack the disconnect event before rdma_destroy_id */
rdma_ack_cm_event(evt); rdma_ack_cm_event(evt);
rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair);
spdk_nvmf_rdma_update_ibv_state(rqpair);
spdk_nvmf_qpair_disconnect(qpair, NULL, NULL); spdk_nvmf_qpair_disconnect(qpair, NULL, NULL);
return 0; return 0;
@ -1227,11 +1231,16 @@ spdk_nvmf_rdma_request_process(struct spdk_nvmf_rdma_transport *rtransport,
TAILQ_REMOVE(&rqpair->incoming_queue, rdma_recv, link); TAILQ_REMOVE(&rqpair->incoming_queue, rdma_recv, link);
if (rqpair->qpair.state == SPDK_NVMF_QPAIR_ERROR || if (rqpair->qpair.state != SPDK_NVMF_QPAIR_ACTIVE) {
rqpair->qpair.state == SPDK_NVMF_QPAIR_DEACTIVATING) {
spdk_nvmf_rdma_request_set_state(rdma_req, RDMA_REQUEST_STATE_COMPLETED); spdk_nvmf_rdma_request_set_state(rdma_req, RDMA_REQUEST_STATE_COMPLETED);
break; break;
} }
if (rqpair->ibv_attr.qp_state == IBV_QPS_ERR) {
spdk_nvmf_rdma_request_set_state(rdma_req, RDMA_REQUEST_STATE_COMPLETED);
break;
}
/* The next state transition depends on the data transfer needs of this request. */ /* The next state transition depends on the data transfer needs of this request. */
rdma_req->req.xfer = spdk_nvmf_rdma_request_get_xfer(rdma_req); rdma_req->req.xfer = spdk_nvmf_rdma_request_get_xfer(rdma_req);
@ -1883,7 +1892,7 @@ spdk_nvmf_rdma_qpair_process_pending(struct spdk_nvmf_rdma_transport *rtransport
/* Do not process newly received commands if qp is in ERROR state, /* Do not process newly received commands if qp is in ERROR state,
* wait till the recovery is complete. * wait till the recovery is complete.
*/ */
if (rqpair->qpair.state == SPDK_NVMF_QPAIR_ERROR) { if (rqpair->ibv_attr.qp_state == IBV_QPS_ERR) {
return; return;
} }
@ -1937,12 +1946,7 @@ spdk_nvmf_rdma_qp_drained(struct spdk_nvmf_rdma_qpair *rqpair)
return; return;
} }
if (rqpair->qpair.state != SPDK_NVMF_QPAIR_ERROR) { state = spdk_nvmf_rdma_update_ibv_state(rqpair);
/* Do not start recovery if qp is not in error state. */
return;
}
state = spdk_nvmf_rdma_get_ibv_state(rqpair);
next_state = state; next_state = state;
SPDK_NOTICELOG("IBV QP#%u is in state: %s\n", SPDK_NOTICELOG("IBV QP#%u is in state: %s\n",
@ -1957,7 +1961,6 @@ spdk_nvmf_rdma_qp_drained(struct spdk_nvmf_rdma_qpair *rqpair)
return; return;
} }
rqpair->qpair.state = SPDK_NVMF_QPAIR_INACTIVE;
recovered = 0; recovered = 0;
while (!recovered) { while (!recovered) {
@ -1993,7 +1996,6 @@ spdk_nvmf_rdma_qp_drained(struct spdk_nvmf_rdma_qpair *rqpair)
state = next_state; state = next_state;
} }
rqpair->qpair.state = SPDK_NVMF_QPAIR_ACTIVE;
rtransport = SPDK_CONTAINEROF(rqpair->qpair.transport, rtransport = SPDK_CONTAINEROF(rqpair->qpair.transport,
struct spdk_nvmf_rdma_transport, transport); struct spdk_nvmf_rdma_transport, transport);
@ -2002,9 +2004,6 @@ spdk_nvmf_rdma_qp_drained(struct spdk_nvmf_rdma_qpair *rqpair)
return; return;
error: error:
SPDK_ERRLOG("IBV qp#%u recovery failed\n", rqpair->qpair.qid); SPDK_ERRLOG("IBV qp#%u recovery failed\n", rqpair->qpair.qid);
/* Put NVMf qpair back into error state so recovery
will trigger disconnect */
rqpair->qpair.state = SPDK_NVMF_QPAIR_ERROR;
spdk_nvmf_qpair_disconnect(&rqpair->qpair, NULL, NULL); spdk_nvmf_qpair_disconnect(&rqpair->qpair, NULL, NULL);
} }
@ -2014,7 +2013,7 @@ _spdk_nvmf_rdma_qp_error(void *arg)
struct spdk_nvmf_rdma_qpair *rqpair = arg; struct spdk_nvmf_rdma_qpair *rqpair = arg;
struct spdk_nvmf_rdma_request *rdma_req, *req_tmp; struct spdk_nvmf_rdma_request *rdma_req, *req_tmp;
rqpair->qpair.state = SPDK_NVMF_QPAIR_ERROR; spdk_nvmf_rdma_update_ibv_state(rqpair);
if (spdk_nvmf_qpair_is_admin_queue(&rqpair->qpair)) { if (spdk_nvmf_qpair_is_admin_queue(&rqpair->qpair)) {
spdk_nvmf_ctrlr_abort_aer(rqpair->qpair.ctrlr); spdk_nvmf_ctrlr_abort_aer(rqpair->qpair.ctrlr);
@ -2056,6 +2055,9 @@ spdk_nvmf_process_ib_event(struct spdk_nvmf_rdma_device *device)
rqpair = event.element.qp->qp_context; rqpair = event.element.qp->qp_context;
/* This call is thread-safe. Immediately update the IBV state on error notification. */
spdk_nvmf_rdma_update_ibv_state(rqpair);
switch (event.event_type) { switch (event.event_type) {
case IBV_EVENT_QP_FATAL: case IBV_EVENT_QP_FATAL:
case IBV_EVENT_QP_LAST_WQE_REACHED: case IBV_EVENT_QP_LAST_WQE_REACHED:
@ -2279,7 +2281,7 @@ spdk_nvmf_rdma_poll_group_add(struct spdk_nvmf_transport_poll_group *group,
return -1; return -1;
} }
spdk_nvmf_rdma_get_ibv_state(rqpair); spdk_nvmf_rdma_update_ibv_state(rqpair);
return 0; return 0;
} }
@ -2359,30 +2361,25 @@ spdk_nvmf_rdma_request_complete(struct spdk_nvmf_request *req)
struct spdk_nvmf_rdma_qpair *rqpair = SPDK_CONTAINEROF(rdma_req->req.qpair, struct spdk_nvmf_rdma_qpair *rqpair = SPDK_CONTAINEROF(rdma_req->req.qpair,
struct spdk_nvmf_rdma_qpair, qpair); struct spdk_nvmf_rdma_qpair, qpair);
switch (rqpair->qpair.state) { if (rqpair->ibv_attr.qp_state != IBV_QPS_ERR) {
case SPDK_NVMF_QPAIR_ERROR: /* The connection is alive, so process the request as normal */
/* Mark request as COMPLETED for ERROR state spdk_nvmf_rdma_request_set_state(rdma_req, RDMA_REQUEST_STATE_EXECUTED);
* so RDMA transfer is not kicked off } else {
*/ /* The connection is dead. Move the request directly to the completed state. */
spdk_nvmf_rdma_request_set_state(rdma_req, RDMA_REQUEST_STATE_COMPLETED); spdk_nvmf_rdma_request_set_state(rdma_req, RDMA_REQUEST_STATE_COMPLETED);
spdk_nvmf_rdma_request_process(rtransport, rdma_req); }
/* QP in ERROR state is awaiting for all requests to be spdk_nvmf_rdma_request_process(rtransport, rdma_req);
* completed by bdev layer
*/ if (rqpair->qpair.state == SPDK_NVMF_QPAIR_ACTIVE && rqpair->ibv_attr.qp_state == IBV_QPS_ERR) {
/* If the NVMe-oF layer thinks the connection is active, but the RDMA layer thinks
* the connection is dead, check if this is the final I/O to complete and perform
* error recovery. */
if (spdk_nvmf_rdma_qpair_is_idle(&rqpair->qpair)) { if (spdk_nvmf_rdma_qpair_is_idle(&rqpair->qpair)) {
spdk_nvmf_rdma_qp_drained(rqpair); spdk_nvmf_rdma_qp_drained(rqpair);
} }
break;
case SPDK_NVMF_QPAIR_INACTIVE:
case SPDK_NVMF_QPAIR_ACTIVATING:
case SPDK_NVMF_QPAIR_ACTIVE:
case SPDK_NVMF_QPAIR_DEACTIVATING:
default:
spdk_nvmf_rdma_request_set_state(rdma_req, RDMA_REQUEST_STATE_EXECUTED);
spdk_nvmf_rdma_request_process(rtransport, rdma_req);
break;
} }
return 0; return 0;
} }