nvme/rdma: Handle failed send/recv as a fatal error
Do not make attempt to resubmit failed send/recv WR, instead report and error to the upper layer (in case of new request) or fail a qpair (in case of active polling). In the case of failed ibv_post_send and disabled `delay_cmd_submit` nvme_rdma_qpair_submit_request returns an error to the caller. The caller completes failed request but RDMA layer still keeps it in a send queue. Later RDMA layer can send the corresponding WR and notify the upper layer about the completion of the request for the second time. Change-Id: I1260f215b8523d39157a5cc3fda39cd4bd87c8ec Signed-off-by: Alexey Marchuk <alexeymar@mellanox.com> Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/1662 Community-CI: Mellanox Build Bot Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Ben Walker <benjamin.walker@intel.com> Reviewed-by: Seth Howell <seth.howell@intel.com> Reviewed-by: Jim Harris <james.r.harris@intel.com>
This commit is contained in:
parent
8c6a345534
commit
9b86f31a38
@ -568,13 +568,6 @@ nvme_rdma_qpair_submit_sends(struct nvme_rdma_qpair *rqpair)
|
||||
if (spdk_unlikely(rc)) {
|
||||
SPDK_ERRLOG("Failed to post WRs on send queue, errno %d (%s), bad_wr %p\n",
|
||||
rc, spdk_strerror(rc), bad_send_wr);
|
||||
|
||||
/* Restart queue from bad wr. If it failed during
|
||||
* completion processing, controller will be moved to
|
||||
* failed state. Otherwise it will likely fail again
|
||||
* in next submit attempt from completion processing.
|
||||
*/
|
||||
spdk_rdma_qp_queue_send_wrs(rqpair->rdma_qp, bad_send_wr);
|
||||
return rc;
|
||||
}
|
||||
|
||||
@ -592,14 +585,9 @@ nvme_rdma_qpair_submit_recvs(struct nvme_rdma_qpair *rqpair)
|
||||
if (spdk_unlikely(rc)) {
|
||||
SPDK_ERRLOG("Failed to post WRs on receive queue, errno %d (%s), bad_wr %p\n",
|
||||
rc, spdk_strerror(rc), bad_recv_wr);
|
||||
/* Restart queue from bad wr. If it failed during
|
||||
* completion processing, controller will be moved to
|
||||
* failed state. Otherwise it will likely fail again
|
||||
* in next submit attempt from completion processing.
|
||||
*/
|
||||
rqpair->recvs_to_post.first = bad_recv_wr;
|
||||
return -1;
|
||||
return rc;
|
||||
}
|
||||
|
||||
rqpair->recvs_to_post.first = NULL;
|
||||
}
|
||||
return 0;
|
||||
@ -2101,8 +2089,10 @@ nvme_rdma_qpair_process_completions(struct spdk_nvme_qpair *qpair,
|
||||
}
|
||||
} while (reaped < max_completions);
|
||||
|
||||
nvme_rdma_qpair_submit_sends(rqpair);
|
||||
nvme_rdma_qpair_submit_recvs(rqpair);
|
||||
if (spdk_unlikely(nvme_rdma_qpair_submit_sends(rqpair) ||
|
||||
nvme_rdma_qpair_submit_recvs(rqpair))) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (spdk_unlikely(rqpair->qpair.ctrlr->timeout_enabled)) {
|
||||
nvme_rdma_qpair_check_timeout(qpair);
|
||||
|
Loading…
Reference in New Issue
Block a user