nvmf/rdma: Treat nvmf qpair state as read-only
Decide which action to take based on a combination of the nvmf qpair state and the RDMA qpair state. Change-Id: I338ace9dd66dd8dcf81aa30e51758aa81768d7f4 Signed-off-by: Ben Walker <benjamin.walker@intel.com> Reviewed-on: https://review.gerrithub.io/421162 Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Chandler-Test-Pool: SPDK Automated Test System <sys_sgsw@intel.com> Reviewed-by: Seth Howell <seth.howell5141@gmail.com> Reviewed-by: Changpeng Liu <changpeng.liu@intel.com> Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
This commit is contained in:
parent
a1acca9274
commit
531fd76d10
@ -342,7 +342,7 @@ static const char *str_ibv_qp_state[] = {
|
|||||||
};
|
};
|
||||||
|
|
||||||
static enum ibv_qp_state
|
static enum ibv_qp_state
|
||||||
spdk_nvmf_rdma_get_ibv_state(struct spdk_nvmf_rdma_qpair *rqpair) {
|
spdk_nvmf_rdma_update_ibv_state(struct spdk_nvmf_rdma_qpair *rqpair) {
|
||||||
int rc;
|
int rc;
|
||||||
|
|
||||||
/* All the attributes needed for recovery */
|
/* All the attributes needed for recovery */
|
||||||
@ -432,7 +432,7 @@ spdk_nvmf_rdma_set_ibv_state(struct spdk_nvmf_rdma_qpair *rqpair,
|
|||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
state = spdk_nvmf_rdma_get_ibv_state(rqpair);
|
state = spdk_nvmf_rdma_update_ibv_state(rqpair);
|
||||||
|
|
||||||
if (state != new_state) {
|
if (state != new_state) {
|
||||||
SPDK_ERRLOG("QP#%d: expected state: %s, actual state: %s\n",
|
SPDK_ERRLOG("QP#%d: expected state: %s, actual state: %s\n",
|
||||||
@ -909,6 +909,7 @@ static int
|
|||||||
nvmf_rdma_disconnect(struct rdma_cm_event *evt)
|
nvmf_rdma_disconnect(struct rdma_cm_event *evt)
|
||||||
{
|
{
|
||||||
struct spdk_nvmf_qpair *qpair;
|
struct spdk_nvmf_qpair *qpair;
|
||||||
|
struct spdk_nvmf_rdma_qpair *rqpair;
|
||||||
|
|
||||||
if (evt->id == NULL) {
|
if (evt->id == NULL) {
|
||||||
SPDK_ERRLOG("disconnect request: missing cm_id\n");
|
SPDK_ERRLOG("disconnect request: missing cm_id\n");
|
||||||
@ -923,6 +924,9 @@ nvmf_rdma_disconnect(struct rdma_cm_event *evt)
|
|||||||
/* ack the disconnect event before rdma_destroy_id */
|
/* ack the disconnect event before rdma_destroy_id */
|
||||||
rdma_ack_cm_event(evt);
|
rdma_ack_cm_event(evt);
|
||||||
|
|
||||||
|
rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair);
|
||||||
|
spdk_nvmf_rdma_update_ibv_state(rqpair);
|
||||||
|
|
||||||
spdk_nvmf_qpair_disconnect(qpair, NULL, NULL);
|
spdk_nvmf_qpair_disconnect(qpair, NULL, NULL);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
@ -1227,11 +1231,16 @@ spdk_nvmf_rdma_request_process(struct spdk_nvmf_rdma_transport *rtransport,
|
|||||||
|
|
||||||
TAILQ_REMOVE(&rqpair->incoming_queue, rdma_recv, link);
|
TAILQ_REMOVE(&rqpair->incoming_queue, rdma_recv, link);
|
||||||
|
|
||||||
if (rqpair->qpair.state == SPDK_NVMF_QPAIR_ERROR ||
|
if (rqpair->qpair.state != SPDK_NVMF_QPAIR_ACTIVE) {
|
||||||
rqpair->qpair.state == SPDK_NVMF_QPAIR_DEACTIVATING) {
|
|
||||||
spdk_nvmf_rdma_request_set_state(rdma_req, RDMA_REQUEST_STATE_COMPLETED);
|
spdk_nvmf_rdma_request_set_state(rdma_req, RDMA_REQUEST_STATE_COMPLETED);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (rqpair->ibv_attr.qp_state == IBV_QPS_ERR) {
|
||||||
|
spdk_nvmf_rdma_request_set_state(rdma_req, RDMA_REQUEST_STATE_COMPLETED);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
/* The next state transition depends on the data transfer needs of this request. */
|
/* The next state transition depends on the data transfer needs of this request. */
|
||||||
rdma_req->req.xfer = spdk_nvmf_rdma_request_get_xfer(rdma_req);
|
rdma_req->req.xfer = spdk_nvmf_rdma_request_get_xfer(rdma_req);
|
||||||
|
|
||||||
@ -1883,7 +1892,7 @@ spdk_nvmf_rdma_qpair_process_pending(struct spdk_nvmf_rdma_transport *rtransport
|
|||||||
/* Do not process newly received commands if qp is in ERROR state,
|
/* Do not process newly received commands if qp is in ERROR state,
|
||||||
* wait till the recovery is complete.
|
* wait till the recovery is complete.
|
||||||
*/
|
*/
|
||||||
if (rqpair->qpair.state == SPDK_NVMF_QPAIR_ERROR) {
|
if (rqpair->ibv_attr.qp_state == IBV_QPS_ERR) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1937,12 +1946,7 @@ spdk_nvmf_rdma_qp_drained(struct spdk_nvmf_rdma_qpair *rqpair)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (rqpair->qpair.state != SPDK_NVMF_QPAIR_ERROR) {
|
state = spdk_nvmf_rdma_update_ibv_state(rqpair);
|
||||||
/* Do not start recovery if qp is not in error state. */
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
state = spdk_nvmf_rdma_get_ibv_state(rqpair);
|
|
||||||
next_state = state;
|
next_state = state;
|
||||||
|
|
||||||
SPDK_NOTICELOG("IBV QP#%u is in state: %s\n",
|
SPDK_NOTICELOG("IBV QP#%u is in state: %s\n",
|
||||||
@ -1957,7 +1961,6 @@ spdk_nvmf_rdma_qp_drained(struct spdk_nvmf_rdma_qpair *rqpair)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
rqpair->qpair.state = SPDK_NVMF_QPAIR_INACTIVE;
|
|
||||||
recovered = 0;
|
recovered = 0;
|
||||||
|
|
||||||
while (!recovered) {
|
while (!recovered) {
|
||||||
@ -1993,7 +1996,6 @@ spdk_nvmf_rdma_qp_drained(struct spdk_nvmf_rdma_qpair *rqpair)
|
|||||||
|
|
||||||
state = next_state;
|
state = next_state;
|
||||||
}
|
}
|
||||||
rqpair->qpair.state = SPDK_NVMF_QPAIR_ACTIVE;
|
|
||||||
|
|
||||||
rtransport = SPDK_CONTAINEROF(rqpair->qpair.transport,
|
rtransport = SPDK_CONTAINEROF(rqpair->qpair.transport,
|
||||||
struct spdk_nvmf_rdma_transport, transport);
|
struct spdk_nvmf_rdma_transport, transport);
|
||||||
@ -2002,9 +2004,6 @@ spdk_nvmf_rdma_qp_drained(struct spdk_nvmf_rdma_qpair *rqpair)
|
|||||||
return;
|
return;
|
||||||
error:
|
error:
|
||||||
SPDK_ERRLOG("IBV qp#%u recovery failed\n", rqpair->qpair.qid);
|
SPDK_ERRLOG("IBV qp#%u recovery failed\n", rqpair->qpair.qid);
|
||||||
/* Put NVMf qpair back into error state so recovery
|
|
||||||
will trigger disconnect */
|
|
||||||
rqpair->qpair.state = SPDK_NVMF_QPAIR_ERROR;
|
|
||||||
spdk_nvmf_qpair_disconnect(&rqpair->qpair, NULL, NULL);
|
spdk_nvmf_qpair_disconnect(&rqpair->qpair, NULL, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2014,7 +2013,7 @@ _spdk_nvmf_rdma_qp_error(void *arg)
|
|||||||
struct spdk_nvmf_rdma_qpair *rqpair = arg;
|
struct spdk_nvmf_rdma_qpair *rqpair = arg;
|
||||||
struct spdk_nvmf_rdma_request *rdma_req, *req_tmp;
|
struct spdk_nvmf_rdma_request *rdma_req, *req_tmp;
|
||||||
|
|
||||||
rqpair->qpair.state = SPDK_NVMF_QPAIR_ERROR;
|
spdk_nvmf_rdma_update_ibv_state(rqpair);
|
||||||
|
|
||||||
if (spdk_nvmf_qpair_is_admin_queue(&rqpair->qpair)) {
|
if (spdk_nvmf_qpair_is_admin_queue(&rqpair->qpair)) {
|
||||||
spdk_nvmf_ctrlr_abort_aer(rqpair->qpair.ctrlr);
|
spdk_nvmf_ctrlr_abort_aer(rqpair->qpair.ctrlr);
|
||||||
@ -2056,6 +2055,9 @@ spdk_nvmf_process_ib_event(struct spdk_nvmf_rdma_device *device)
|
|||||||
|
|
||||||
rqpair = event.element.qp->qp_context;
|
rqpair = event.element.qp->qp_context;
|
||||||
|
|
||||||
|
/* This call is thread-safe. Immediately update the IBV state on error notification. */
|
||||||
|
spdk_nvmf_rdma_update_ibv_state(rqpair);
|
||||||
|
|
||||||
switch (event.event_type) {
|
switch (event.event_type) {
|
||||||
case IBV_EVENT_QP_FATAL:
|
case IBV_EVENT_QP_FATAL:
|
||||||
case IBV_EVENT_QP_LAST_WQE_REACHED:
|
case IBV_EVENT_QP_LAST_WQE_REACHED:
|
||||||
@ -2279,7 +2281,7 @@ spdk_nvmf_rdma_poll_group_add(struct spdk_nvmf_transport_poll_group *group,
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
spdk_nvmf_rdma_get_ibv_state(rqpair);
|
spdk_nvmf_rdma_update_ibv_state(rqpair);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -2359,30 +2361,25 @@ spdk_nvmf_rdma_request_complete(struct spdk_nvmf_request *req)
|
|||||||
struct spdk_nvmf_rdma_qpair *rqpair = SPDK_CONTAINEROF(rdma_req->req.qpair,
|
struct spdk_nvmf_rdma_qpair *rqpair = SPDK_CONTAINEROF(rdma_req->req.qpair,
|
||||||
struct spdk_nvmf_rdma_qpair, qpair);
|
struct spdk_nvmf_rdma_qpair, qpair);
|
||||||
|
|
||||||
switch (rqpair->qpair.state) {
|
if (rqpair->ibv_attr.qp_state != IBV_QPS_ERR) {
|
||||||
case SPDK_NVMF_QPAIR_ERROR:
|
/* The connection is alive, so process the request as normal */
|
||||||
/* Mark request as COMPLETED for ERROR state
|
spdk_nvmf_rdma_request_set_state(rdma_req, RDMA_REQUEST_STATE_EXECUTED);
|
||||||
* so RDMA transfer is not kicked off
|
} else {
|
||||||
*/
|
/* The connection is dead. Move the request directly to the completed state. */
|
||||||
spdk_nvmf_rdma_request_set_state(rdma_req, RDMA_REQUEST_STATE_COMPLETED);
|
spdk_nvmf_rdma_request_set_state(rdma_req, RDMA_REQUEST_STATE_COMPLETED);
|
||||||
spdk_nvmf_rdma_request_process(rtransport, rdma_req);
|
}
|
||||||
|
|
||||||
/* QP in ERROR state is awaiting for all requests to be
|
spdk_nvmf_rdma_request_process(rtransport, rdma_req);
|
||||||
* completed by bdev layer
|
|
||||||
*/
|
if (rqpair->qpair.state == SPDK_NVMF_QPAIR_ACTIVE && rqpair->ibv_attr.qp_state == IBV_QPS_ERR) {
|
||||||
|
/* If the NVMe-oF layer thinks the connection is active, but the RDMA layer thinks
|
||||||
|
* the connection is dead, check if this is the final I/O to complete and perform
|
||||||
|
* error recovery. */
|
||||||
if (spdk_nvmf_rdma_qpair_is_idle(&rqpair->qpair)) {
|
if (spdk_nvmf_rdma_qpair_is_idle(&rqpair->qpair)) {
|
||||||
spdk_nvmf_rdma_qp_drained(rqpair);
|
spdk_nvmf_rdma_qp_drained(rqpair);
|
||||||
}
|
}
|
||||||
break;
|
|
||||||
case SPDK_NVMF_QPAIR_INACTIVE:
|
|
||||||
case SPDK_NVMF_QPAIR_ACTIVATING:
|
|
||||||
case SPDK_NVMF_QPAIR_ACTIVE:
|
|
||||||
case SPDK_NVMF_QPAIR_DEACTIVATING:
|
|
||||||
default:
|
|
||||||
spdk_nvmf_rdma_request_set_state(rdma_req, RDMA_REQUEST_STATE_EXECUTED);
|
|
||||||
spdk_nvmf_rdma_request_process(rtransport, rdma_req);
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user