From 48a547fd82c153d290b8908de2ca5ad81a2086a3 Mon Sep 17 00:00:00 2001 From: Ben Walker Date: Thu, 9 Jan 2020 12:20:43 -0700 Subject: [PATCH] nvmf/tcp: Wait for R2T send ack before processing H2C Previously, the R2T was sent and if an H2C arrived prior to seeing the R2T ack, it was processed anyway. Serialize this process. In practice, if the H2C arrives with a correctly functioning initiator, that means the R2T already made it to the initiator. But because the PDU hasn't been released yet, immediately processing the PDU requires an extra PDU associated with the request. Basically, making this change halves the worst-case number of PDUs required per connection. In the current sock layer implementations, it's not actually possible for the R2T send ack to occur after that H2C arrives. But with the upcoming addition of MSG_ZEROCOPY and other sock implementations, it's best to fix this now. Change-Id: Ifefaf48fcf2ff1dcc75e1686bbb9229b7ae3c219 Signed-off-by: Ben Walker Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/479906 Tested-by: SPDK CI Jenkins Reviewed-by: Jim Harris Reviewed-by: Shuhei Matsumoto --- lib/nvmf/tcp.c | 85 ++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 69 insertions(+), 16 deletions(-) diff --git a/lib/nvmf/tcp.c b/lib/nvmf/tcp.c index ce2515427..991734582 100644 --- a/lib/nvmf/tcp.c +++ b/lib/nvmf/tcp.c @@ -71,6 +71,9 @@ enum spdk_nvmf_tcp_req_state { /* The request is currently transferring data from the host to the controller. */ TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER, + /* The request is waiting for the R2T send acknowledgement. */ + TCP_REQUEST_STATE_AWAITING_R2T_ACK, + /* The request is ready to execute at the block device */ TCP_REQUEST_STATE_READY_TO_EXECUTE, @@ -117,6 +120,7 @@ static const char *spdk_nvmf_tcp_term_req_fes_str[] = { #define TRACE_TCP_FLUSH_WRITEBUF_START SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x9) #define TRACE_TCP_FLUSH_WRITEBUF_DONE SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0xA) #define TRACE_TCP_READ_FROM_SOCKET_DONE SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0xB) +#define TRACE_TCP_REQUEST_STATE_AWAIT_R2T_ACK SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0xC) SPDK_TRACE_REGISTER_FN(nvmf_tcp_trace, "nvmf_tcp", TRACE_GROUP_NVMF_TCP) { @@ -157,6 +161,9 @@ SPDK_TRACE_REGISTER_FN(nvmf_tcp_trace, "nvmf_tcp", TRACE_GROUP_NVMF_TCP) spdk_trace_register_description("TCP_READ_DONE", TRACE_TCP_READ_FROM_SOCKET_DONE, OWNER_NONE, OBJECT_NONE, 0, 0, ""); + spdk_trace_register_description("TCP_REQ_AWAIT_R2T_ACK", + TRACE_TCP_REQUEST_STATE_AWAIT_R2T_ACK, + OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, ""); } struct spdk_nvmf_tcp_req { @@ -393,6 +400,7 @@ spdk_nvmf_tcp_cleanup_all_states(struct spdk_nvmf_tcp_qpair *tqpair) spdk_nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_NEED_BUFFER); spdk_nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_EXECUTING); spdk_nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER); + spdk_nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_AWAITING_R2T_ACK); } static void @@ -750,6 +758,8 @@ spdk_nvmf_tcp_qpair_write_pdu(struct spdk_nvmf_tcp_qpair *tqpair, uint32_t mapped_length = 0; ssize_t rc; + assert(&tqpair->pdu_in_progress != pdu); + hlen = pdu->hdr->common.hlen; enable_digest = 1; if (pdu->hdr->common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_IC_RESP || @@ -1220,6 +1230,33 @@ err: spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset); } +static int +nvmf_tcp_find_req_in_state(struct spdk_nvmf_tcp_qpair *tqpair, + enum spdk_nvmf_tcp_req_state state, + uint16_t cid, uint16_t tag, + struct spdk_nvmf_tcp_req **req) +{ + struct spdk_nvmf_tcp_req *tcp_req = NULL; + + TAILQ_FOREACH(tcp_req, &tqpair->state_queue[state], state_link) { + if (tcp_req->req.cmd->nvme_cmd.cid != cid) { + continue; + } + + if (tcp_req->ttag == tag) { + *req = tcp_req; + return 0; + } + + *req = NULL; + return -1; + } + + /* Didn't find it, but not an error */ + *req = NULL; + return 0; +} + static void spdk_nvmf_tcp_h2c_data_hdr_handle(struct spdk_nvmf_tcp_transport *ttransport, struct spdk_nvmf_tcp_qpair *tqpair, @@ -1229,29 +1266,24 @@ spdk_nvmf_tcp_h2c_data_hdr_handle(struct spdk_nvmf_tcp_transport *ttransport, uint32_t error_offset = 0; enum spdk_nvme_tcp_term_req_fes fes = 0; struct spdk_nvme_tcp_h2c_data_hdr *h2c_data; - bool ttag_offset_error = false; + int rc; h2c_data = &pdu->hdr->h2c_data; SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tqpair=%p, r2t_info: datao=%u, datal=%u, cccid=%u, ttag=%u\n", tqpair, h2c_data->datao, h2c_data->datal, h2c_data->cccid, h2c_data->ttag); - /* According to the information in the pdu to find the req */ - TAILQ_FOREACH(tcp_req, &tqpair->state_queue[TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER], - state_link) { - if ((tcp_req->req.cmd->nvme_cmd.cid == h2c_data->cccid) && (tcp_req->ttag == h2c_data->ttag)) { - break; - } - - if (!ttag_offset_error && (tcp_req->req.cmd->nvme_cmd.cid == h2c_data->cccid)) { - ttag_offset_error = true; - } + rc = nvmf_tcp_find_req_in_state(tqpair, TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER, + h2c_data->cccid, h2c_data->ttag, &tcp_req); + if (rc == 0 && tcp_req == NULL) { + rc = nvmf_tcp_find_req_in_state(tqpair, TCP_REQUEST_STATE_AWAITING_R2T_ACK, h2c_data->cccid, + h2c_data->ttag, &tcp_req); } if (!tcp_req) { SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tcp_req is not found for tqpair=%p\n", tqpair); fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER; - if (!ttag_offset_error) { + if (rc == 0) { error_offset = offsetof(struct spdk_nvme_tcp_h2c_data_hdr, cccid); } else { error_offset = offsetof(struct spdk_nvme_tcp_h2c_data_hdr, ttag); @@ -1341,8 +1373,19 @@ static void spdk_nvmf_tcp_r2t_complete(void *cb_arg) { struct spdk_nvmf_tcp_req *tcp_req = cb_arg; + struct spdk_nvmf_tcp_transport *ttransport; nvmf_tcp_req_pdu_fini(tcp_req); + + ttransport = SPDK_CONTAINEROF(tcp_req->req.qpair->transport, + struct spdk_nvmf_tcp_transport, transport); + + spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER); + + if (tcp_req->h2c_offset == tcp_req->req.length) { + spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE); + spdk_nvmf_tcp_req_process(ttransport, tcp_req); + } } static void @@ -1369,6 +1412,8 @@ spdk_nvmf_tcp_send_r2t_pdu(struct spdk_nvmf_tcp_qpair *tqpair, r2t->r2to = tcp_req->h2c_offset; r2t->r2tl = tcp_req->req.length; + spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_AWAITING_R2T_ACK); + SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tcp_req(%p) on tqpair(%p), r2t_info: cccid=%u, ttag=%u, r2to=%u, r2tl=%u\n", tcp_req, tqpair, r2t->cccid, r2t->ttag, r2t->r2to, r2t->r2tl); @@ -1391,7 +1436,10 @@ spdk_nvmf_tcp_h2c_data_payload_handle(struct spdk_nvmf_tcp_transport *ttransport spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); - if (tcp_req->h2c_offset == tcp_req->req.length) { + /* Wait for all of the data to arrive AND for the initial R2T PDU send to be + * acknowledged before moving on. */ + if (tcp_req->h2c_offset == tcp_req->req.length && + tcp_req->state == TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER) { spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE); spdk_nvmf_tcp_req_process(ttransport, tcp_req); } @@ -2245,14 +2293,14 @@ spdk_nvmf_tcp_req_process(struct spdk_nvmf_tcp_transport *ttransport, /* If data is transferring from host to controller, we need to do a transfer from the host. */ if (tcp_req->req.xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) { - spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER); if (tcp_req->req.data_from_pool) { - SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Will send r2t for tcp_req(%p) on tqpair=%p\n", tcp_req, tqpair); - tcp_req->h2c_offset = 0; + SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Sending R2T for tcp_req(%p) on tqpair=%p\n", tcp_req, tqpair); spdk_nvmf_tcp_send_r2t_pdu(tqpair, tcp_req); } else { struct nvme_tcp_pdu *pdu; + spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER); + pdu = &tqpair->pdu_in_progress; SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Not need to send r2t for tcp_req(%p) on tqpair=%p\n", tcp_req, tqpair); @@ -2266,7 +2314,12 @@ spdk_nvmf_tcp_req_process(struct spdk_nvmf_tcp_transport *ttransport, spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE); break; + case TCP_REQUEST_STATE_AWAITING_R2T_ACK: + spdk_trace_record(TRACE_TCP_REQUEST_STATE_AWAIT_R2T_ACK, 0, 0, (uintptr_t)tcp_req, 0); + /* The R2T completion or the h2c data incoming will kick it out of this state. */ + break; case TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER: + spdk_trace_record(TRACE_TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER, 0, 0, (uintptr_t)tcp_req, 0); /* Some external code must kick a request into TCP_REQUEST_STATE_READY_TO_EXECUTE