rdma: batch rdma sends.
By batching ibv sends each time we poll, we can reduce the number of MMIO writes that we do. Change-Id: Ia5a07b0037365abfa8732629c34d34a9ed49ac70 Signed-off-by: Seth Howell <seth.howell@intel.com> Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/449253 Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com> Reviewed-by: Jim Harris <james.r.harris@intel.com>
This commit is contained in:
parent
4c03cae6f5
commit
9d63933b7f
149
lib/nvmf/rdma.c
149
lib/nvmf/rdma.c
@ -286,6 +286,11 @@ struct spdk_nvmf_rdma_resource_opts {
|
||||
bool shared;
|
||||
};
|
||||
|
||||
struct spdk_nvmf_send_wr_list {
|
||||
struct ibv_send_wr *first;
|
||||
struct ibv_send_wr *last;
|
||||
};
|
||||
|
||||
struct spdk_nvmf_rdma_resources {
|
||||
/* Array of size "max_queue_depth" containing RDMA requests. */
|
||||
struct spdk_nvmf_rdma_request *reqs;
|
||||
@ -356,6 +361,9 @@ struct spdk_nvmf_rdma_qpair {
|
||||
/* The maximum number of SGEs per WR on the recv queue */
|
||||
uint32_t max_recv_sge;
|
||||
|
||||
/* The list of pending send requests for a transfer */
|
||||
struct spdk_nvmf_send_wr_list sends_to_post;
|
||||
|
||||
struct spdk_nvmf_rdma_resources *resources;
|
||||
|
||||
STAILQ_HEAD(, spdk_nvmf_rdma_request) pending_rdma_read_queue;
|
||||
@ -619,7 +627,8 @@ nvmf_rdma_request_free_data(struct spdk_nvmf_rdma_request *rdma_req,
|
||||
while (next_data_wr) {
|
||||
current_data_wr = next_data_wr;
|
||||
send_wr = current_data_wr->wr.next;
|
||||
if (send_wr != NULL && send_wr != &rdma_req->rsp.wr) {
|
||||
if (send_wr != NULL && send_wr != &rdma_req->rsp.wr &&
|
||||
send_wr->wr_id == current_data_wr->wr.wr_id) {
|
||||
next_data_wr = SPDK_CONTAINEROF(send_wr, struct spdk_nvmf_rdma_request_data, wr);
|
||||
} else {
|
||||
next_data_wr = NULL;
|
||||
@ -979,6 +988,9 @@ spdk_nvmf_rdma_qpair_initialize(struct spdk_nvmf_qpair *qpair)
|
||||
spdk_trace_record(TRACE_RDMA_QP_CREATE, 0, 0, (uintptr_t)rqpair->cm_id, 0);
|
||||
SPDK_DEBUGLOG(SPDK_LOG_RDMA, "New RDMA Connection: %p\n", qpair);
|
||||
|
||||
rqpair->sends_to_post.first = NULL;
|
||||
rqpair->sends_to_post.last = NULL;
|
||||
|
||||
if (rqpair->poller->srq == NULL) {
|
||||
rtransport = SPDK_CONTAINEROF(qpair->transport, struct spdk_nvmf_rdma_transport, transport);
|
||||
transport = &rtransport->transport;
|
||||
@ -1013,14 +1025,34 @@ error:
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
/* Append the given send wr structure to the qpair's outstanding sends list. */
|
||||
/* This function accepts either a single wr or the first wr in a linked list. */
|
||||
static void
|
||||
nvmf_rdma_qpair_queue_send_wrs(struct spdk_nvmf_rdma_qpair *rqpair, struct ibv_send_wr *first)
|
||||
{
|
||||
struct ibv_send_wr *last;
|
||||
|
||||
last = first;
|
||||
while (last->next != NULL) {
|
||||
last = last->next;
|
||||
}
|
||||
|
||||
if (rqpair->sends_to_post.first == NULL) {
|
||||
rqpair->sends_to_post.first = first;
|
||||
rqpair->sends_to_post.last = last;
|
||||
} else {
|
||||
rqpair->sends_to_post.last->next = first;
|
||||
rqpair->sends_to_post.last = last;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
request_transfer_in(struct spdk_nvmf_request *req)
|
||||
{
|
||||
int rc;
|
||||
struct spdk_nvmf_rdma_request *rdma_req;
|
||||
struct spdk_nvmf_qpair *qpair;
|
||||
struct spdk_nvmf_rdma_qpair *rqpair;
|
||||
struct ibv_send_wr *bad_wr = NULL;
|
||||
|
||||
qpair = req->qpair;
|
||||
rdma_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_rdma_request, req);
|
||||
@ -1029,13 +1061,7 @@ request_transfer_in(struct spdk_nvmf_request *req)
|
||||
assert(req->xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER);
|
||||
assert(rdma_req != NULL);
|
||||
|
||||
SPDK_DEBUGLOG(SPDK_LOG_RDMA, "RDMA READ POSTED. Request: %p Connection: %p\n", req, qpair);
|
||||
|
||||
rc = ibv_post_send(rqpair->cm_id->qp, &rdma_req->data.wr, &bad_wr);
|
||||
if (rc) {
|
||||
SPDK_ERRLOG("Unable to transfer data from host to target\n");
|
||||
return -1;
|
||||
}
|
||||
nvmf_rdma_qpair_queue_send_wrs(rqpair, &rdma_req->data.wr);
|
||||
rqpair->current_read_depth += rdma_req->num_outstanding_data_wr;
|
||||
rqpair->current_send_depth += rdma_req->num_outstanding_data_wr;
|
||||
return 0;
|
||||
@ -1051,7 +1077,7 @@ request_transfer_out(struct spdk_nvmf_request *req, int *data_posted)
|
||||
struct spdk_nvmf_rdma_qpair *rqpair;
|
||||
struct spdk_nvme_cpl *rsp;
|
||||
struct ibv_recv_wr *bad_recv_wr = NULL;
|
||||
struct ibv_send_wr *send_wr, *bad_send_wr = NULL;
|
||||
struct ibv_send_wr *first = NULL;
|
||||
|
||||
*data_posted = 0;
|
||||
qpair = req->qpair;
|
||||
@ -1089,24 +1115,15 @@ request_transfer_out(struct spdk_nvmf_request *req, int *data_posted)
|
||||
* RDMA WRITEs to transfer data, plus an RDMA SEND
|
||||
* containing the response.
|
||||
*/
|
||||
send_wr = &rdma_req->rsp.wr;
|
||||
first = &rdma_req->rsp.wr;
|
||||
|
||||
if (rsp->status.sc == SPDK_NVME_SC_SUCCESS &&
|
||||
req->xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST) {
|
||||
SPDK_DEBUGLOG(SPDK_LOG_RDMA, "RDMA WRITE POSTED. Request: %p Connection: %p\n", req, qpair);
|
||||
send_wr = &rdma_req->data.wr;
|
||||
first = &rdma_req->data.wr;
|
||||
*data_posted = 1;
|
||||
num_outstanding_data_wr = rdma_req->num_outstanding_data_wr;
|
||||
}
|
||||
|
||||
SPDK_DEBUGLOG(SPDK_LOG_RDMA, "RDMA SEND POSTED. Request: %p Connection: %p\n", req, qpair);
|
||||
|
||||
/* Send the completion */
|
||||
rc = ibv_post_send(rqpair->cm_id->qp, send_wr, &bad_send_wr);
|
||||
if (rc) {
|
||||
SPDK_ERRLOG("Unable to send response capsule\n");
|
||||
return rc;
|
||||
}
|
||||
nvmf_rdma_qpair_queue_send_wrs(rqpair, first);
|
||||
/* +1 for the rsp wr */
|
||||
rqpair->current_send_depth += num_outstanding_data_wr + 1;
|
||||
|
||||
@ -1797,6 +1814,7 @@ nvmf_rdma_request_free(struct spdk_nvmf_rdma_request *rdma_req,
|
||||
rdma_req->req.length = 0;
|
||||
rdma_req->req.iovcnt = 0;
|
||||
rdma_req->req.data = NULL;
|
||||
rdma_req->rsp.wr.next = NULL;
|
||||
rdma_req->data.wr.next = NULL;
|
||||
rqpair->qd--;
|
||||
|
||||
@ -3190,6 +3208,88 @@ spdk_nvmf_rdma_req_is_completing(struct spdk_nvmf_rdma_request *rdma_req)
|
||||
}
|
||||
#endif
|
||||
|
||||
static void
|
||||
_qp_reset_failed_sends(struct spdk_nvmf_rdma_transport *rtransport,
|
||||
struct spdk_nvmf_rdma_qpair *rqpair, struct ibv_send_wr *bad_wr, int rc)
|
||||
{
|
||||
struct spdk_nvmf_rdma_wr *bad_rdma_wr;
|
||||
struct spdk_nvmf_rdma_request *prev_rdma_req = NULL, *cur_rdma_req = NULL;
|
||||
|
||||
SPDK_ERRLOG("Failed to post a send for the qpair %p with errno %d\n", rqpair, -rc);
|
||||
for (; bad_wr != NULL; bad_wr = bad_wr->next) {
|
||||
bad_rdma_wr = (struct spdk_nvmf_rdma_wr *)bad_wr->wr_id;
|
||||
assert(rqpair->current_send_depth > 0);
|
||||
rqpair->current_send_depth--;
|
||||
switch (bad_rdma_wr->type) {
|
||||
case RDMA_WR_TYPE_DATA:
|
||||
cur_rdma_req = SPDK_CONTAINEROF(bad_rdma_wr, struct spdk_nvmf_rdma_request, data.rdma_wr);
|
||||
if (bad_wr->opcode == IBV_WR_RDMA_READ) {
|
||||
assert(rqpair->current_read_depth > 0);
|
||||
rqpair->current_read_depth--;
|
||||
}
|
||||
break;
|
||||
case RDMA_WR_TYPE_SEND:
|
||||
cur_rdma_req = SPDK_CONTAINEROF(bad_rdma_wr, struct spdk_nvmf_rdma_request, rsp.rdma_wr);
|
||||
break;
|
||||
default:
|
||||
SPDK_ERRLOG("Found a RECV in the list of pending SEND requests for qpair %p\n", rqpair);
|
||||
prev_rdma_req = cur_rdma_req;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (prev_rdma_req == cur_rdma_req) {
|
||||
/* this request was handled by an earlier wr. i.e. we were performing an nvme read. */
|
||||
/* We only have to check against prev_wr since each requests wrs are contiguous in this list. */
|
||||
continue;
|
||||
}
|
||||
|
||||
switch (cur_rdma_req->state) {
|
||||
case RDMA_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER:
|
||||
cur_rdma_req->req.rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
|
||||
cur_rdma_req->state = RDMA_REQUEST_STATE_READY_TO_COMPLETE;
|
||||
break;
|
||||
case RDMA_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST:
|
||||
case RDMA_REQUEST_STATE_COMPLETING:
|
||||
cur_rdma_req->state = RDMA_REQUEST_STATE_COMPLETED;
|
||||
break;
|
||||
default:
|
||||
SPDK_ERRLOG("Found a request in a bad state %d when draining pending SEND requests for qpair %p\n",
|
||||
cur_rdma_req->state, rqpair);
|
||||
continue;
|
||||
}
|
||||
|
||||
spdk_nvmf_rdma_request_process(rtransport, cur_rdma_req);
|
||||
prev_rdma_req = cur_rdma_req;
|
||||
}
|
||||
|
||||
if (rqpair->qpair.state == SPDK_NVMF_QPAIR_ACTIVE) {
|
||||
/* Disconnect the connection. */
|
||||
spdk_nvmf_rdma_start_disconnect(rqpair);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static void
|
||||
_poller_submit_sends(struct spdk_nvmf_rdma_transport *rtransport,
|
||||
struct spdk_nvmf_rdma_poller *rpoller)
|
||||
{
|
||||
struct spdk_nvmf_rdma_qpair *rqpair;
|
||||
struct ibv_send_wr *bad_wr = NULL;
|
||||
int rc;
|
||||
|
||||
TAILQ_FOREACH(rqpair, &rpoller->qpairs, link) {
|
||||
if (rqpair->sends_to_post.first != NULL) {
|
||||
rc = ibv_post_send(rqpair->cm_id->qp, rqpair->sends_to_post.first, &bad_wr);
|
||||
/* bad wr always points to the first wr that failed. */
|
||||
if (rc) {
|
||||
_qp_reset_failed_sends(rtransport, rqpair, bad_wr, rc);
|
||||
}
|
||||
rqpair->sends_to_post.first = NULL;
|
||||
rqpair->sends_to_post.last = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
spdk_nvmf_rdma_poller_poll(struct spdk_nvmf_rdma_transport *rtransport,
|
||||
struct spdk_nvmf_rdma_poller *rpoller)
|
||||
@ -3319,6 +3419,9 @@ spdk_nvmf_rdma_poller_poll(struct spdk_nvmf_rdma_transport *rtransport,
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* submit outstanding work requests. */
|
||||
_poller_submit_sends(rtransport, rpoller);
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user