nvmf: Re-post the capsule immediately upon sending a completion

The target can only provide updates to sq_head inside
of completions. Therefore, we must update sq_head prior
to sending the completion or we'll incorrectly get into
queue full scenarios.

Change-Id: If2925d39570bbc247801219f352e690d33132a2d
Signed-off-by: Ben Walker <benjamin.walker@intel.com>
This commit is contained in:
Ben Walker 2016-07-27 10:11:28 -07:00
parent 7e23841d28
commit eee64c69f7
2 changed files with 99 additions and 47 deletions

View File

@ -440,51 +440,110 @@ nvmf_post_rdma_send(struct spdk_nvmf_request *req)
return rc; return rc;
} }
/**
* REQUEST COMPLETION HANDLING
*
* Request completion consists of three steps:
*
* 1) Transfer any data to the host using an RDMA Write. If no data,
* skip this step. (spdk_nvmf_rdma_request_transfer_data)
* 2) Upon transfer completion, update sq_head, re-post the recv capsule,
* and send the completion. (spdk_nvmf_rdma_request_send_completion)
* 3) Upon getting acknowledgement of the completion, decrement the internal
* count of number of outstanding requests. (spdk_nvmf_rdma_request_ack_completion)
*
* There are two public interfaces to initiate the process of completing a request,
* exposed as callbacks in the transport layer.
*
* 1) spdk_nvmf_rdma_request_complete, which attempts to do all three steps.
* 2) spdk_nvmf_rdma_request_release, which skips straight to step 3.
**/
static int static int
spdk_nvmf_rdma_request_complete(struct spdk_nvmf_request *req) spdk_nvmf_rdma_request_transfer_data(struct spdk_nvmf_request *req)
{ {
struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl; if (req->xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST) {
int ret; return nvmf_post_rdma_write(req);
} else if (req->xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) {
if (rsp->status.sc == SPDK_NVME_SC_SUCCESS && return nvmf_post_rdma_read(req);
req->xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST) {
/* Need to transfer data via RDMA Write */
ret = nvmf_post_rdma_write(req);
if (ret) {
SPDK_ERRLOG("Unable to post rdma write tx descriptor\n");
return -1;
}
} else {
/* Send the completion */
ret = nvmf_post_rdma_send(req);
if (ret) {
SPDK_ERRLOG("Unable to send response capsule\n");
return -1;
}
} }
return 0; /* This should not have been called if there is no data to xfer */
assert(false);
return -1;
} }
static int static int
spdk_nvmf_rdma_request_release(struct spdk_nvmf_request *req) spdk_nvmf_rdma_request_send_completion(struct spdk_nvmf_request *req)
{
int rc;
struct spdk_nvmf_conn *conn = req->conn;
struct spdk_nvmf_rdma_conn *rdma_conn = get_rdma_conn(conn);
struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
/* Advance our sq_head pointer */
conn->sq_head++;
if (conn->sq_head == rdma_conn->queue_depth) {
conn->sq_head = 0;
}
rsp->sqhd = conn->sq_head;
/* Post the capsule to the recv buffer */
rc = nvmf_post_rdma_recv(req);
if (rc) {
SPDK_ERRLOG("Unable to re-post rx descriptor\n");
return rc;
}
/* Send the completion */
rc = nvmf_post_rdma_send(req);
if (rc) {
SPDK_ERRLOG("Unable to send response capsule\n");
}
return rc;
}
static int
spdk_nvmf_rdma_request_ack_completion(struct spdk_nvmf_request *req)
{ {
struct spdk_nvmf_conn *conn = req->conn; struct spdk_nvmf_conn *conn = req->conn;
struct spdk_nvmf_rdma_conn *rdma_conn = get_rdma_conn(conn); struct spdk_nvmf_rdma_conn *rdma_conn = get_rdma_conn(conn);
if (nvmf_post_rdma_recv(req)) { /* Advance our sq_head pointer */
SPDK_ERRLOG("Unable to re-post rx descriptor\n");
return -1;
}
conn->sq_head++; conn->sq_head++;
if (conn->sq_head == rdma_conn->queue_depth) { if (conn->sq_head == rdma_conn->queue_depth) {
conn->sq_head = 0; conn->sq_head = 0;
} }
rdma_conn->num_outstanding_reqs--;
return 0; return 0;
} }
static int
spdk_nvmf_rdma_request_complete(struct spdk_nvmf_request *req)
{
struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
int rc;
if (rsp->status.sc == SPDK_NVME_SC_SUCCESS &&
req->xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST) {
rc = spdk_nvmf_rdma_request_transfer_data(req);
} else {
rc = spdk_nvmf_rdma_request_send_completion(req);
}
return rc;
}
static int
spdk_nvmf_rdma_request_release(struct spdk_nvmf_request *req)
{
return spdk_nvmf_rdma_request_ack_completion(req);
}
static int static int
nvmf_rdma_connect(struct rdma_cm_event *event) nvmf_rdma_connect(struct rdma_cm_event *event)
{ {
@ -555,11 +614,11 @@ nvmf_rdma_connect(struct rdma_cm_event *event)
queue_depth = nvmf_min(queue_depth, private_data->hsqsize); queue_depth = nvmf_min(queue_depth, private_data->hsqsize);
} }
/* Queue Depth cannot exceed R/W depth */
queue_depth = nvmf_min(queue_depth, rw_depth);
SPDK_TRACELOG(SPDK_TRACE_RDMA, "Final Negotiated Queue Depth: %d R/W Depth: %d\n", SPDK_TRACELOG(SPDK_TRACE_RDMA, "Final Negotiated Queue Depth: %d R/W Depth: %d\n",
queue_depth, rw_depth); queue_depth, rw_depth);
/* TEMPORARY: Limit the queue_depth to the rw_depth due to lack of queueing */
queue_depth = rw_depth;
/* Init the NVMf rdma transport connection */ /* Init the NVMf rdma transport connection */
rdma_conn = spdk_nvmf_rdma_conn_create(event->id, queue_depth, rw_depth); rdma_conn = spdk_nvmf_rdma_conn_create(event->id, queue_depth, rw_depth);
@ -1065,8 +1124,8 @@ spdk_nvmf_rdma_poll(struct spdk_nvmf_conn *conn)
} }
if (wc.status) { if (wc.status) {
SPDK_ERRLOG("Send CQ error (%d): %s\n", SPDK_ERRLOG("Send CQ error on Connection %p, Request 0x%lu (%d): %s\n",
wc.status, ibv_wc_status_str(wc.status)); conn, wc.wr_id, wc.status, ibv_wc_status_str(wc.status));
return -1; return -1;
} }
@ -1081,11 +1140,11 @@ spdk_nvmf_rdma_poll(struct spdk_nvmf_conn *conn)
switch (wc.opcode) { switch (wc.opcode) {
case IBV_WC_SEND: case IBV_WC_SEND:
assert(rdma_conn->num_outstanding_reqs > 0); assert(rdma_conn->num_outstanding_reqs > 0);
rdma_conn->num_outstanding_reqs--;
SPDK_TRACELOG(SPDK_TRACE_RDMA, SPDK_TRACELOG(SPDK_TRACE_RDMA,
"RDMA SEND Complete. Request: %p Connection: %p Outstanding I/O: %d\n", "RDMA SEND Complete. Request: %p Connection: %p Outstanding I/O: %d\n",
req, conn, rdma_conn->num_outstanding_reqs); req, conn, rdma_conn->num_outstanding_reqs - 1);
if (spdk_nvmf_rdma_request_release(req)) { rc = spdk_nvmf_rdma_request_ack_completion(req);
if (rc) {
return -1; return -1;
} }
break; break;
@ -1094,9 +1153,8 @@ spdk_nvmf_rdma_poll(struct spdk_nvmf_conn *conn)
SPDK_TRACELOG(SPDK_TRACE_RDMA, "RDMA WRITE Complete. Request: %p Connection: %p\n", SPDK_TRACELOG(SPDK_TRACE_RDMA, "RDMA WRITE Complete. Request: %p Connection: %p\n",
req, conn); req, conn);
spdk_trace_record(TRACE_RDMA_WRITE_COMPLETE, 0, 0, (uint64_t)req, 0); spdk_trace_record(TRACE_RDMA_WRITE_COMPLETE, 0, 0, (uint64_t)req, 0);
/* Send the completion */ rc = spdk_nvmf_rdma_request_send_completion(req);
if (nvmf_post_rdma_send(req)) { if (rc) {
SPDK_ERRLOG("Unable to send response capsule\n");
return -1; return -1;
} }
break; break;
@ -1107,7 +1165,6 @@ spdk_nvmf_rdma_poll(struct spdk_nvmf_conn *conn)
spdk_trace_record(TRACE_RDMA_READ_COMPLETE, 0, 0, (uint64_t)req, 0); spdk_trace_record(TRACE_RDMA_READ_COMPLETE, 0, 0, (uint64_t)req, 0);
rc = spdk_nvmf_request_exec(req); rc = spdk_nvmf_request_exec(req);
if (rc) { if (rc) {
SPDK_ERRLOG("request_exec error %d after RDMA Read completion\n", rc);
return -1; return -1;
} }
count++; count++;
@ -1165,21 +1222,17 @@ spdk_nvmf_rdma_poll(struct spdk_nvmf_conn *conn)
memset(req->rsp, 0, sizeof(*req->rsp)); memset(req->rsp, 0, sizeof(*req->rsp));
rc = spdk_nvmf_request_prep_data(req); rc = spdk_nvmf_request_prep_data(req);
if (rc < 0) { if (rc < 0) {
SPDK_ERRLOG("prep_data failed\n"); return spdk_nvmf_rdma_request_complete(req);
return spdk_nvmf_request_complete(req);
} else if (rc == 0) { } else if (rc == 0) {
/* Data is immediately available */ /* Data is immediately available */
rc = spdk_nvmf_request_exec(req); rc = spdk_nvmf_request_exec(req);
if (rc < 0) { if (rc < 0) {
SPDK_ERRLOG("Command execution failed\n");
return -1; return -1;
} }
count++; count++;
} else { } else {
/* Start transfer of data from host to target */ rc = spdk_nvmf_rdma_request_transfer_data(req);
rc = nvmf_post_rdma_read(req);
if (rc) { if (rc) {
SPDK_ERRLOG("Unable to transfer data from host to target\n");
return -1; return -1;
} }
} }

View File

@ -57,12 +57,11 @@ spdk_nvmf_request_complete(struct spdk_nvmf_request *req)
response->sqid = 0; response->sqid = 0;
response->status.p = 0; response->status.p = 0;
response->sqhd = req->conn->sq_head;
response->cid = req->cmd->nvme_cmd.cid; response->cid = req->cmd->nvme_cmd.cid;
SPDK_TRACELOG(SPDK_TRACE_NVMF, SPDK_TRACELOG(SPDK_TRACE_NVMF,
"cpl: cid=%u cdw0=0x%08x rsvd1=%u sqhd=%u status=0x%04x\n", "cpl: cid=%u cdw0=0x%08x rsvd1=%u status=0x%04x\n",
response->cid, response->cdw0, response->rsvd1, response->sqhd, response->cid, response->cdw0, response->rsvd1,
*(uint16_t *)&response->status); *(uint16_t *)&response->status);
if (req->conn->transport->req_complete(req)) { if (req->conn->transport->req_complete(req)) {