nvme/rdma: Handle failed send/recv as a fatal error
Do not make attempt to resubmit failed send/recv WR, instead report and error to the upper layer (in case of new request) or fail a qpair (in case of active polling). In the case of failed ibv_post_send and disabled `delay_cmd_submit` nvme_rdma_qpair_submit_request returns an error to the caller. The caller completes failed request but RDMA layer still keeps it in a send queue. Later RDMA layer can send the corresponding WR and notify the upper layer about the completion of the request for the second time. Change-Id: I1260f215b8523d39157a5cc3fda39cd4bd87c8ec Signed-off-by: Alexey Marchuk <alexeymar@mellanox.com> Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/1662 Community-CI: Mellanox Build Bot Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Ben Walker <benjamin.walker@intel.com> Reviewed-by: Seth Howell <seth.howell@intel.com> Reviewed-by: Jim Harris <james.r.harris@intel.com>
This commit is contained in:
parent
8c6a345534
commit
9b86f31a38
@ -568,13 +568,6 @@ nvme_rdma_qpair_submit_sends(struct nvme_rdma_qpair *rqpair)
|
|||||||
if (spdk_unlikely(rc)) {
|
if (spdk_unlikely(rc)) {
|
||||||
SPDK_ERRLOG("Failed to post WRs on send queue, errno %d (%s), bad_wr %p\n",
|
SPDK_ERRLOG("Failed to post WRs on send queue, errno %d (%s), bad_wr %p\n",
|
||||||
rc, spdk_strerror(rc), bad_send_wr);
|
rc, spdk_strerror(rc), bad_send_wr);
|
||||||
|
|
||||||
/* Restart queue from bad wr. If it failed during
|
|
||||||
* completion processing, controller will be moved to
|
|
||||||
* failed state. Otherwise it will likely fail again
|
|
||||||
* in next submit attempt from completion processing.
|
|
||||||
*/
|
|
||||||
spdk_rdma_qp_queue_send_wrs(rqpair->rdma_qp, bad_send_wr);
|
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -592,14 +585,9 @@ nvme_rdma_qpair_submit_recvs(struct nvme_rdma_qpair *rqpair)
|
|||||||
if (spdk_unlikely(rc)) {
|
if (spdk_unlikely(rc)) {
|
||||||
SPDK_ERRLOG("Failed to post WRs on receive queue, errno %d (%s), bad_wr %p\n",
|
SPDK_ERRLOG("Failed to post WRs on receive queue, errno %d (%s), bad_wr %p\n",
|
||||||
rc, spdk_strerror(rc), bad_recv_wr);
|
rc, spdk_strerror(rc), bad_recv_wr);
|
||||||
/* Restart queue from bad wr. If it failed during
|
return rc;
|
||||||
* completion processing, controller will be moved to
|
|
||||||
* failed state. Otherwise it will likely fail again
|
|
||||||
* in next submit attempt from completion processing.
|
|
||||||
*/
|
|
||||||
rqpair->recvs_to_post.first = bad_recv_wr;
|
|
||||||
return -1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
rqpair->recvs_to_post.first = NULL;
|
rqpair->recvs_to_post.first = NULL;
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
@ -2101,8 +2089,10 @@ nvme_rdma_qpair_process_completions(struct spdk_nvme_qpair *qpair,
|
|||||||
}
|
}
|
||||||
} while (reaped < max_completions);
|
} while (reaped < max_completions);
|
||||||
|
|
||||||
nvme_rdma_qpair_submit_sends(rqpair);
|
if (spdk_unlikely(nvme_rdma_qpair_submit_sends(rqpair) ||
|
||||||
nvme_rdma_qpair_submit_recvs(rqpair);
|
nvme_rdma_qpair_submit_recvs(rqpair))) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
|
||||||
if (spdk_unlikely(rqpair->qpair.ctrlr->timeout_enabled)) {
|
if (spdk_unlikely(rqpair->qpair.ctrlr->timeout_enabled)) {
|
||||||
nvme_rdma_qpair_check_timeout(qpair);
|
nvme_rdma_qpair_check_timeout(qpair);
|
||||||
|
Loading…
Reference in New Issue
Block a user