nvmf/rdma: issue fused cmds consecutively to tgt layer
RDMA READs can cause cmds to be submitted to the target layer in a different order than they were received from the host. Normally this is fine, but not for fused commands. So track fused commands as they reach nvmf_rdma_request_process(). If we find a pair of sequential commands that don't have valid FUSED settings (i.e. NONE/SECOND, FIRST/NONE, FIRST/FIRST), we mark the requests as "fused_failed" and will later fail them just before they would be normally sent to the target layer. When we do find a pair of valid fused commands (FIRST followed by SECOND), we will wait until both are READY_TO_EXECUTE, and then submit them to the target layer consecutively. This fixes issue #2428 for RDMA transport. Signed-off-by: Jim Harris <james.r.harris@intel.com> Change-Id: I01ebf90e17761499fb6601456811f442dc2a2950 Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/12018 Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com> Community-CI: Mellanox Build Bot Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Changpeng Liu <changpeng.liu@intel.com> Reviewed-by: Ben Walker <benjamin.walker@intel.com>
This commit is contained in:
parent
c8802892ab
commit
183c348557
121
lib/nvmf/rdma.c
121
lib/nvmf/rdma.c
@ -264,6 +264,9 @@ struct spdk_nvmf_rdma_request {
|
|||||||
uint32_t num_outstanding_data_wr;
|
uint32_t num_outstanding_data_wr;
|
||||||
uint64_t receive_tsc;
|
uint64_t receive_tsc;
|
||||||
|
|
||||||
|
bool fused_failed;
|
||||||
|
struct spdk_nvmf_rdma_request *fused_pair;
|
||||||
|
|
||||||
STAILQ_ENTRY(spdk_nvmf_rdma_request) state_link;
|
STAILQ_ENTRY(spdk_nvmf_rdma_request) state_link;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -380,6 +383,12 @@ struct spdk_nvmf_rdma_qpair {
|
|||||||
*/
|
*/
|
||||||
enum ibv_qp_state ibv_state;
|
enum ibv_qp_state ibv_state;
|
||||||
|
|
||||||
|
/* Points to the a request that has fuse bits set to
|
||||||
|
* SPDK_NVME_CMD_FUSE_FIRST, when the qpair is waiting
|
||||||
|
* for the request that has SPDK_NVME_CMD_FUSE_SECOND.
|
||||||
|
*/
|
||||||
|
struct spdk_nvmf_rdma_request *fused_first;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* io_channel which is used to destroy qpair when it is removed from poll group
|
* io_channel which is used to destroy qpair when it is removed from poll group
|
||||||
*/
|
*/
|
||||||
@ -1938,6 +1947,19 @@ _nvmf_rdma_request_free(struct spdk_nvmf_rdma_request *rdma_req,
|
|||||||
rdma_req->data.wr.next = NULL;
|
rdma_req->data.wr.next = NULL;
|
||||||
rdma_req->offset = 0;
|
rdma_req->offset = 0;
|
||||||
rdma_req->req.dif_enabled = false;
|
rdma_req->req.dif_enabled = false;
|
||||||
|
rdma_req->fused_failed = false;
|
||||||
|
if (rdma_req->fused_pair) {
|
||||||
|
/* This req was part of a valid fused pair, but failed before it got to
|
||||||
|
* READ_TO_EXECUTE state. This means we need to fail the other request
|
||||||
|
* in the pair, because it is no longer part of a valid pair. If the pair
|
||||||
|
* already reached READY_TO_EXECUTE state, we need to kick it.
|
||||||
|
*/
|
||||||
|
rdma_req->fused_pair->fused_failed = true;
|
||||||
|
if (rdma_req->fused_pair->state == RDMA_REQUEST_STATE_READY_TO_EXECUTE) {
|
||||||
|
nvmf_rdma_request_process(rtransport, rdma_req->fused_pair);
|
||||||
|
}
|
||||||
|
rdma_req->fused_pair = NULL;
|
||||||
|
}
|
||||||
memset(&rdma_req->req.dif, 0, sizeof(rdma_req->req.dif));
|
memset(&rdma_req->req.dif, 0, sizeof(rdma_req->req.dif));
|
||||||
rqpair->qd--;
|
rqpair->qd--;
|
||||||
|
|
||||||
@ -1945,6 +1967,57 @@ _nvmf_rdma_request_free(struct spdk_nvmf_rdma_request *rdma_req,
|
|||||||
rdma_req->state = RDMA_REQUEST_STATE_FREE;
|
rdma_req->state = RDMA_REQUEST_STATE_FREE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
nvmf_rdma_check_fused_ordering(struct spdk_nvmf_rdma_transport *rtransport,
|
||||||
|
struct spdk_nvmf_rdma_qpair *rqpair,
|
||||||
|
struct spdk_nvmf_rdma_request *rdma_req)
|
||||||
|
{
|
||||||
|
enum spdk_nvme_cmd_fuse last, next;
|
||||||
|
|
||||||
|
last = rqpair->fused_first ? rqpair->fused_first->req.cmd->nvme_cmd.fuse : SPDK_NVME_CMD_FUSE_NONE;
|
||||||
|
next = rdma_req->req.cmd->nvme_cmd.fuse;
|
||||||
|
|
||||||
|
assert(last != SPDK_NVME_CMD_FUSE_SECOND);
|
||||||
|
|
||||||
|
if (spdk_likely(last == SPDK_NVME_CMD_FUSE_NONE && next == SPDK_NVME_CMD_FUSE_NONE)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (last == SPDK_NVME_CMD_FUSE_FIRST) {
|
||||||
|
if (next == SPDK_NVME_CMD_FUSE_SECOND) {
|
||||||
|
/* This is a valid pair of fused commands. Point them at each other
|
||||||
|
* so they can be submitted consecutively once ready to be executed.
|
||||||
|
*/
|
||||||
|
rqpair->fused_first->fused_pair = rdma_req;
|
||||||
|
rdma_req->fused_pair = rqpair->fused_first;
|
||||||
|
rqpair->fused_first = NULL;
|
||||||
|
return;
|
||||||
|
} else {
|
||||||
|
/* Mark the last req as failed since it wasn't followed by a SECOND. */
|
||||||
|
rqpair->fused_first->fused_failed = true;
|
||||||
|
|
||||||
|
/* If the last req is in READY_TO_EXECUTE state, then call
|
||||||
|
* nvmf_rdma_request_process(), otherwise nothing else will kick it.
|
||||||
|
*/
|
||||||
|
if (rqpair->fused_first->state == RDMA_REQUEST_STATE_READY_TO_EXECUTE) {
|
||||||
|
nvmf_rdma_request_process(rtransport, rqpair->fused_first);
|
||||||
|
}
|
||||||
|
|
||||||
|
rqpair->fused_first = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (next == SPDK_NVME_CMD_FUSE_FIRST) {
|
||||||
|
/* Set rqpair->fused_first here so that we know to check that the next request
|
||||||
|
* is a SECOND (and to fail this one if it isn't).
|
||||||
|
*/
|
||||||
|
rqpair->fused_first = rdma_req;
|
||||||
|
} else if (next == SPDK_NVME_CMD_FUSE_SECOND) {
|
||||||
|
/* Mark this req failed since it ia SECOND and the last one was not a FIRST. */
|
||||||
|
rdma_req->fused_failed = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
nvmf_rdma_request_process(struct spdk_nvmf_rdma_transport *rtransport,
|
nvmf_rdma_request_process(struct spdk_nvmf_rdma_transport *rtransport,
|
||||||
struct spdk_nvmf_rdma_request *rdma_req)
|
struct spdk_nvmf_rdma_request *rdma_req)
|
||||||
@ -2008,6 +2081,8 @@ nvmf_rdma_request_process(struct spdk_nvmf_rdma_transport *rtransport,
|
|||||||
rdma_req->req.dif_enabled = true;
|
rdma_req->req.dif_enabled = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
nvmf_rdma_check_fused_ordering(rtransport, rqpair, rdma_req);
|
||||||
|
|
||||||
#ifdef SPDK_CONFIG_RDMA_SEND_WITH_INVAL
|
#ifdef SPDK_CONFIG_RDMA_SEND_WITH_INVAL
|
||||||
rdma_req->rsp.wr.opcode = IBV_WR_SEND;
|
rdma_req->rsp.wr.opcode = IBV_WR_SEND;
|
||||||
rdma_req->rsp.wr.imm_data = 0;
|
rdma_req->rsp.wr.imm_data = 0;
|
||||||
@ -2129,8 +2204,54 @@ nvmf_rdma_request_process(struct spdk_nvmf_rdma_transport *rtransport,
|
|||||||
rdma_req->req.length = rdma_req->req.dif.elba_length;
|
rdma_req->req.length = rdma_req->req.dif.elba_length;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (rdma_req->req.cmd->nvme_cmd.fuse != SPDK_NVME_CMD_FUSE_NONE) {
|
||||||
|
if (rdma_req->fused_failed) {
|
||||||
|
/* This request failed FUSED semantics. Fail it immediately, without
|
||||||
|
* even sending it to the target layer.
|
||||||
|
*/
|
||||||
|
rsp->status.sct = SPDK_NVME_SCT_GENERIC;
|
||||||
|
rsp->status.sc = SPDK_NVME_SC_ABORTED_MISSING_FUSED;
|
||||||
|
rdma_req->state = RDMA_REQUEST_STATE_READY_TO_COMPLETE;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (rdma_req->fused_pair == NULL ||
|
||||||
|
rdma_req->fused_pair->state != RDMA_REQUEST_STATE_READY_TO_EXECUTE) {
|
||||||
|
/* This request is ready to execute, but either we don't know yet if it's
|
||||||
|
* valid - i.e. this is a FIRST but we haven't received the next
|
||||||
|
* request yet or the other request of this fused pair isn't ready to
|
||||||
|
* execute. So break here and this request will get processed later either
|
||||||
|
* when the other request is ready or we find that this request isn't valid.
|
||||||
|
*/
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* If we get to this point, and this request is a fused command, we know that
|
||||||
|
* it is part of valid sequence (FIRST followed by a SECOND) and that both
|
||||||
|
* requests are READY_TO_EXECUTE. So call spdk_nvmf_request_exec() both on this
|
||||||
|
* request, and the other request of the fused pair, in the correct order.
|
||||||
|
* Also clear the ->fused_pair pointers on both requests, since after this point
|
||||||
|
* we no longer need to maintain the relationship between these two requests.
|
||||||
|
*/
|
||||||
|
if (rdma_req->req.cmd->nvme_cmd.fuse == SPDK_NVME_CMD_FUSE_SECOND) {
|
||||||
|
assert(rdma_req->fused_pair != NULL);
|
||||||
|
assert(rdma_req->fused_pair->fused_pair != NULL);
|
||||||
|
rdma_req->fused_pair->state = RDMA_REQUEST_STATE_EXECUTING;
|
||||||
|
spdk_nvmf_request_exec(&rdma_req->fused_pair->req);
|
||||||
|
rdma_req->fused_pair->fused_pair = NULL;
|
||||||
|
rdma_req->fused_pair = NULL;
|
||||||
|
}
|
||||||
rdma_req->state = RDMA_REQUEST_STATE_EXECUTING;
|
rdma_req->state = RDMA_REQUEST_STATE_EXECUTING;
|
||||||
spdk_nvmf_request_exec(&rdma_req->req);
|
spdk_nvmf_request_exec(&rdma_req->req);
|
||||||
|
if (rdma_req->req.cmd->nvme_cmd.fuse == SPDK_NVME_CMD_FUSE_FIRST) {
|
||||||
|
assert(rdma_req->fused_pair != NULL);
|
||||||
|
assert(rdma_req->fused_pair->fused_pair != NULL);
|
||||||
|
rdma_req->fused_pair->state = RDMA_REQUEST_STATE_EXECUTING;
|
||||||
|
spdk_nvmf_request_exec(&rdma_req->fused_pair->req);
|
||||||
|
rdma_req->fused_pair->fused_pair = NULL;
|
||||||
|
rdma_req->fused_pair = NULL;
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
case RDMA_REQUEST_STATE_EXECUTING:
|
case RDMA_REQUEST_STATE_EXECUTING:
|
||||||
spdk_trace_record(TRACE_RDMA_REQUEST_STATE_EXECUTING, 0, 0,
|
spdk_trace_record(TRACE_RDMA_REQUEST_STATE_EXECUTING, 0, 0,
|
||||||
|
Loading…
Reference in New Issue
Block a user