nvmf/tcp: Wait for R2T send ack before processing H2C

Previously, the R2T was sent and if an H2C arrived prior
to seeing the R2T ack, it was processed anyway. Serialize
this process.

In practice, if the H2C arrives with a correctly functioning
initiator, that means the R2T already made it to the initiator.
But because the PDU hasn't been released yet, immediately processing the
PDU requires an extra PDU associated with the request. Basically, making
this change halves the worst-case number of PDUs required per
connection.

In the current sock layer implementations, it's not actually possible
for the R2T send ack to occur after that H2C arrives. But with the
upcoming addition of MSG_ZEROCOPY and other sock implementations, it's
best to fix this now.

Change-Id: Ifefaf48fcf2ff1dcc75e1686bbb9229b7ae3c219
Signed-off-by: Ben Walker <benjamin.walker@intel.com>
Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/479906
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
This commit is contained in:
Ben Walker 2020-01-09 12:20:43 -07:00 committed by Tomasz Zawadzki
parent 033ef363a9
commit 48a547fd82

View File

@ -71,6 +71,9 @@ enum spdk_nvmf_tcp_req_state {
/* The request is currently transferring data from the host to the controller. */
TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER,
/* The request is waiting for the R2T send acknowledgement. */
TCP_REQUEST_STATE_AWAITING_R2T_ACK,
/* The request is ready to execute at the block device */
TCP_REQUEST_STATE_READY_TO_EXECUTE,
@ -117,6 +120,7 @@ static const char *spdk_nvmf_tcp_term_req_fes_str[] = {
#define TRACE_TCP_FLUSH_WRITEBUF_START SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x9)
#define TRACE_TCP_FLUSH_WRITEBUF_DONE SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0xA)
#define TRACE_TCP_READ_FROM_SOCKET_DONE SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0xB)
#define TRACE_TCP_REQUEST_STATE_AWAIT_R2T_ACK SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0xC)
SPDK_TRACE_REGISTER_FN(nvmf_tcp_trace, "nvmf_tcp", TRACE_GROUP_NVMF_TCP)
{
@ -157,6 +161,9 @@ SPDK_TRACE_REGISTER_FN(nvmf_tcp_trace, "nvmf_tcp", TRACE_GROUP_NVMF_TCP)
spdk_trace_register_description("TCP_READ_DONE",
TRACE_TCP_READ_FROM_SOCKET_DONE,
OWNER_NONE, OBJECT_NONE, 0, 0, "");
spdk_trace_register_description("TCP_REQ_AWAIT_R2T_ACK",
TRACE_TCP_REQUEST_STATE_AWAIT_R2T_ACK,
OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
}
struct spdk_nvmf_tcp_req {
@ -393,6 +400,7 @@ spdk_nvmf_tcp_cleanup_all_states(struct spdk_nvmf_tcp_qpair *tqpair)
spdk_nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_NEED_BUFFER);
spdk_nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_EXECUTING);
spdk_nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER);
spdk_nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_AWAITING_R2T_ACK);
}
static void
@ -750,6 +758,8 @@ spdk_nvmf_tcp_qpair_write_pdu(struct spdk_nvmf_tcp_qpair *tqpair,
uint32_t mapped_length = 0;
ssize_t rc;
assert(&tqpair->pdu_in_progress != pdu);
hlen = pdu->hdr->common.hlen;
enable_digest = 1;
if (pdu->hdr->common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_IC_RESP ||
@ -1220,6 +1230,33 @@ err:
spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
}
static int
nvmf_tcp_find_req_in_state(struct spdk_nvmf_tcp_qpair *tqpair,
enum spdk_nvmf_tcp_req_state state,
uint16_t cid, uint16_t tag,
struct spdk_nvmf_tcp_req **req)
{
struct spdk_nvmf_tcp_req *tcp_req = NULL;
TAILQ_FOREACH(tcp_req, &tqpair->state_queue[state], state_link) {
if (tcp_req->req.cmd->nvme_cmd.cid != cid) {
continue;
}
if (tcp_req->ttag == tag) {
*req = tcp_req;
return 0;
}
*req = NULL;
return -1;
}
/* Didn't find it, but not an error */
*req = NULL;
return 0;
}
static void
spdk_nvmf_tcp_h2c_data_hdr_handle(struct spdk_nvmf_tcp_transport *ttransport,
struct spdk_nvmf_tcp_qpair *tqpair,
@ -1229,29 +1266,24 @@ spdk_nvmf_tcp_h2c_data_hdr_handle(struct spdk_nvmf_tcp_transport *ttransport,
uint32_t error_offset = 0;
enum spdk_nvme_tcp_term_req_fes fes = 0;
struct spdk_nvme_tcp_h2c_data_hdr *h2c_data;
bool ttag_offset_error = false;
int rc;
h2c_data = &pdu->hdr->h2c_data;
SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tqpair=%p, r2t_info: datao=%u, datal=%u, cccid=%u, ttag=%u\n",
tqpair, h2c_data->datao, h2c_data->datal, h2c_data->cccid, h2c_data->ttag);
/* According to the information in the pdu to find the req */
TAILQ_FOREACH(tcp_req, &tqpair->state_queue[TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER],
state_link) {
if ((tcp_req->req.cmd->nvme_cmd.cid == h2c_data->cccid) && (tcp_req->ttag == h2c_data->ttag)) {
break;
}
if (!ttag_offset_error && (tcp_req->req.cmd->nvme_cmd.cid == h2c_data->cccid)) {
ttag_offset_error = true;
}
rc = nvmf_tcp_find_req_in_state(tqpair, TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER,
h2c_data->cccid, h2c_data->ttag, &tcp_req);
if (rc == 0 && tcp_req == NULL) {
rc = nvmf_tcp_find_req_in_state(tqpair, TCP_REQUEST_STATE_AWAITING_R2T_ACK, h2c_data->cccid,
h2c_data->ttag, &tcp_req);
}
if (!tcp_req) {
SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tcp_req is not found for tqpair=%p\n", tqpair);
fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER;
if (!ttag_offset_error) {
if (rc == 0) {
error_offset = offsetof(struct spdk_nvme_tcp_h2c_data_hdr, cccid);
} else {
error_offset = offsetof(struct spdk_nvme_tcp_h2c_data_hdr, ttag);
@ -1341,8 +1373,19 @@ static void
spdk_nvmf_tcp_r2t_complete(void *cb_arg)
{
struct spdk_nvmf_tcp_req *tcp_req = cb_arg;
struct spdk_nvmf_tcp_transport *ttransport;
nvmf_tcp_req_pdu_fini(tcp_req);
ttransport = SPDK_CONTAINEROF(tcp_req->req.qpair->transport,
struct spdk_nvmf_tcp_transport, transport);
spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER);
if (tcp_req->h2c_offset == tcp_req->req.length) {
spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE);
spdk_nvmf_tcp_req_process(ttransport, tcp_req);
}
}
static void
@ -1369,6 +1412,8 @@ spdk_nvmf_tcp_send_r2t_pdu(struct spdk_nvmf_tcp_qpair *tqpair,
r2t->r2to = tcp_req->h2c_offset;
r2t->r2tl = tcp_req->req.length;
spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_AWAITING_R2T_ACK);
SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP,
"tcp_req(%p) on tqpair(%p), r2t_info: cccid=%u, ttag=%u, r2to=%u, r2tl=%u\n",
tcp_req, tqpair, r2t->cccid, r2t->ttag, r2t->r2to, r2t->r2tl);
@ -1391,7 +1436,10 @@ spdk_nvmf_tcp_h2c_data_payload_handle(struct spdk_nvmf_tcp_transport *ttransport
spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
if (tcp_req->h2c_offset == tcp_req->req.length) {
/* Wait for all of the data to arrive AND for the initial R2T PDU send to be
* acknowledged before moving on. */
if (tcp_req->h2c_offset == tcp_req->req.length &&
tcp_req->state == TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER) {
spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE);
spdk_nvmf_tcp_req_process(ttransport, tcp_req);
}
@ -2245,14 +2293,14 @@ spdk_nvmf_tcp_req_process(struct spdk_nvmf_tcp_transport *ttransport,
/* If data is transferring from host to controller, we need to do a transfer from the host. */
if (tcp_req->req.xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) {
spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER);
if (tcp_req->req.data_from_pool) {
SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Will send r2t for tcp_req(%p) on tqpair=%p\n", tcp_req, tqpair);
tcp_req->h2c_offset = 0;
SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Sending R2T for tcp_req(%p) on tqpair=%p\n", tcp_req, tqpair);
spdk_nvmf_tcp_send_r2t_pdu(tqpair, tcp_req);
} else {
struct nvme_tcp_pdu *pdu;
spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER);
pdu = &tqpair->pdu_in_progress;
SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Not need to send r2t for tcp_req(%p) on tqpair=%p\n", tcp_req,
tqpair);
@ -2266,7 +2314,12 @@ spdk_nvmf_tcp_req_process(struct spdk_nvmf_tcp_transport *ttransport,
spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE);
break;
case TCP_REQUEST_STATE_AWAITING_R2T_ACK:
spdk_trace_record(TRACE_TCP_REQUEST_STATE_AWAIT_R2T_ACK, 0, 0, (uintptr_t)tcp_req, 0);
/* The R2T completion or the h2c data incoming will kick it out of this state. */
break;
case TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER:
spdk_trace_record(TRACE_TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER, 0, 0,
(uintptr_t)tcp_req, 0);
/* Some external code must kick a request into TCP_REQUEST_STATE_READY_TO_EXECUTE