nvmf/tcp: Wait for R2T send ack before processing H2C
Previously, the R2T was sent and if an H2C arrived prior to seeing the R2T ack, it was processed anyway. Serialize this process. In practice, if the H2C arrives with a correctly functioning initiator, that means the R2T already made it to the initiator. But because the PDU hasn't been released yet, immediately processing the PDU requires an extra PDU associated with the request. Basically, making this change halves the worst-case number of PDUs required per connection. In the current sock layer implementations, it's not actually possible for the R2T send ack to occur after that H2C arrives. But with the upcoming addition of MSG_ZEROCOPY and other sock implementations, it's best to fix this now. Change-Id: Ifefaf48fcf2ff1dcc75e1686bbb9229b7ae3c219 Signed-off-by: Ben Walker <benjamin.walker@intel.com> Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/479906 Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Jim Harris <james.r.harris@intel.com> Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
This commit is contained in:
parent
033ef363a9
commit
48a547fd82
@ -71,6 +71,9 @@ enum spdk_nvmf_tcp_req_state {
|
|||||||
/* The request is currently transferring data from the host to the controller. */
|
/* The request is currently transferring data from the host to the controller. */
|
||||||
TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER,
|
TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER,
|
||||||
|
|
||||||
|
/* The request is waiting for the R2T send acknowledgement. */
|
||||||
|
TCP_REQUEST_STATE_AWAITING_R2T_ACK,
|
||||||
|
|
||||||
/* The request is ready to execute at the block device */
|
/* The request is ready to execute at the block device */
|
||||||
TCP_REQUEST_STATE_READY_TO_EXECUTE,
|
TCP_REQUEST_STATE_READY_TO_EXECUTE,
|
||||||
|
|
||||||
@ -117,6 +120,7 @@ static const char *spdk_nvmf_tcp_term_req_fes_str[] = {
|
|||||||
#define TRACE_TCP_FLUSH_WRITEBUF_START SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x9)
|
#define TRACE_TCP_FLUSH_WRITEBUF_START SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x9)
|
||||||
#define TRACE_TCP_FLUSH_WRITEBUF_DONE SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0xA)
|
#define TRACE_TCP_FLUSH_WRITEBUF_DONE SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0xA)
|
||||||
#define TRACE_TCP_READ_FROM_SOCKET_DONE SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0xB)
|
#define TRACE_TCP_READ_FROM_SOCKET_DONE SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0xB)
|
||||||
|
#define TRACE_TCP_REQUEST_STATE_AWAIT_R2T_ACK SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0xC)
|
||||||
|
|
||||||
SPDK_TRACE_REGISTER_FN(nvmf_tcp_trace, "nvmf_tcp", TRACE_GROUP_NVMF_TCP)
|
SPDK_TRACE_REGISTER_FN(nvmf_tcp_trace, "nvmf_tcp", TRACE_GROUP_NVMF_TCP)
|
||||||
{
|
{
|
||||||
@ -157,6 +161,9 @@ SPDK_TRACE_REGISTER_FN(nvmf_tcp_trace, "nvmf_tcp", TRACE_GROUP_NVMF_TCP)
|
|||||||
spdk_trace_register_description("TCP_READ_DONE",
|
spdk_trace_register_description("TCP_READ_DONE",
|
||||||
TRACE_TCP_READ_FROM_SOCKET_DONE,
|
TRACE_TCP_READ_FROM_SOCKET_DONE,
|
||||||
OWNER_NONE, OBJECT_NONE, 0, 0, "");
|
OWNER_NONE, OBJECT_NONE, 0, 0, "");
|
||||||
|
spdk_trace_register_description("TCP_REQ_AWAIT_R2T_ACK",
|
||||||
|
TRACE_TCP_REQUEST_STATE_AWAIT_R2T_ACK,
|
||||||
|
OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
struct spdk_nvmf_tcp_req {
|
struct spdk_nvmf_tcp_req {
|
||||||
@ -393,6 +400,7 @@ spdk_nvmf_tcp_cleanup_all_states(struct spdk_nvmf_tcp_qpair *tqpair)
|
|||||||
spdk_nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_NEED_BUFFER);
|
spdk_nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_NEED_BUFFER);
|
||||||
spdk_nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_EXECUTING);
|
spdk_nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_EXECUTING);
|
||||||
spdk_nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER);
|
spdk_nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER);
|
||||||
|
spdk_nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_AWAITING_R2T_ACK);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
@ -750,6 +758,8 @@ spdk_nvmf_tcp_qpair_write_pdu(struct spdk_nvmf_tcp_qpair *tqpair,
|
|||||||
uint32_t mapped_length = 0;
|
uint32_t mapped_length = 0;
|
||||||
ssize_t rc;
|
ssize_t rc;
|
||||||
|
|
||||||
|
assert(&tqpair->pdu_in_progress != pdu);
|
||||||
|
|
||||||
hlen = pdu->hdr->common.hlen;
|
hlen = pdu->hdr->common.hlen;
|
||||||
enable_digest = 1;
|
enable_digest = 1;
|
||||||
if (pdu->hdr->common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_IC_RESP ||
|
if (pdu->hdr->common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_IC_RESP ||
|
||||||
@ -1220,6 +1230,33 @@ err:
|
|||||||
spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
|
spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
nvmf_tcp_find_req_in_state(struct spdk_nvmf_tcp_qpair *tqpair,
|
||||||
|
enum spdk_nvmf_tcp_req_state state,
|
||||||
|
uint16_t cid, uint16_t tag,
|
||||||
|
struct spdk_nvmf_tcp_req **req)
|
||||||
|
{
|
||||||
|
struct spdk_nvmf_tcp_req *tcp_req = NULL;
|
||||||
|
|
||||||
|
TAILQ_FOREACH(tcp_req, &tqpair->state_queue[state], state_link) {
|
||||||
|
if (tcp_req->req.cmd->nvme_cmd.cid != cid) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (tcp_req->ttag == tag) {
|
||||||
|
*req = tcp_req;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
*req = NULL;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Didn't find it, but not an error */
|
||||||
|
*req = NULL;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
spdk_nvmf_tcp_h2c_data_hdr_handle(struct spdk_nvmf_tcp_transport *ttransport,
|
spdk_nvmf_tcp_h2c_data_hdr_handle(struct spdk_nvmf_tcp_transport *ttransport,
|
||||||
struct spdk_nvmf_tcp_qpair *tqpair,
|
struct spdk_nvmf_tcp_qpair *tqpair,
|
||||||
@ -1229,29 +1266,24 @@ spdk_nvmf_tcp_h2c_data_hdr_handle(struct spdk_nvmf_tcp_transport *ttransport,
|
|||||||
uint32_t error_offset = 0;
|
uint32_t error_offset = 0;
|
||||||
enum spdk_nvme_tcp_term_req_fes fes = 0;
|
enum spdk_nvme_tcp_term_req_fes fes = 0;
|
||||||
struct spdk_nvme_tcp_h2c_data_hdr *h2c_data;
|
struct spdk_nvme_tcp_h2c_data_hdr *h2c_data;
|
||||||
bool ttag_offset_error = false;
|
int rc;
|
||||||
|
|
||||||
h2c_data = &pdu->hdr->h2c_data;
|
h2c_data = &pdu->hdr->h2c_data;
|
||||||
|
|
||||||
SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tqpair=%p, r2t_info: datao=%u, datal=%u, cccid=%u, ttag=%u\n",
|
SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tqpair=%p, r2t_info: datao=%u, datal=%u, cccid=%u, ttag=%u\n",
|
||||||
tqpair, h2c_data->datao, h2c_data->datal, h2c_data->cccid, h2c_data->ttag);
|
tqpair, h2c_data->datao, h2c_data->datal, h2c_data->cccid, h2c_data->ttag);
|
||||||
|
|
||||||
/* According to the information in the pdu to find the req */
|
rc = nvmf_tcp_find_req_in_state(tqpair, TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER,
|
||||||
TAILQ_FOREACH(tcp_req, &tqpair->state_queue[TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER],
|
h2c_data->cccid, h2c_data->ttag, &tcp_req);
|
||||||
state_link) {
|
if (rc == 0 && tcp_req == NULL) {
|
||||||
if ((tcp_req->req.cmd->nvme_cmd.cid == h2c_data->cccid) && (tcp_req->ttag == h2c_data->ttag)) {
|
rc = nvmf_tcp_find_req_in_state(tqpair, TCP_REQUEST_STATE_AWAITING_R2T_ACK, h2c_data->cccid,
|
||||||
break;
|
h2c_data->ttag, &tcp_req);
|
||||||
}
|
|
||||||
|
|
||||||
if (!ttag_offset_error && (tcp_req->req.cmd->nvme_cmd.cid == h2c_data->cccid)) {
|
|
||||||
ttag_offset_error = true;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!tcp_req) {
|
if (!tcp_req) {
|
||||||
SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tcp_req is not found for tqpair=%p\n", tqpair);
|
SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tcp_req is not found for tqpair=%p\n", tqpair);
|
||||||
fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER;
|
fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER;
|
||||||
if (!ttag_offset_error) {
|
if (rc == 0) {
|
||||||
error_offset = offsetof(struct spdk_nvme_tcp_h2c_data_hdr, cccid);
|
error_offset = offsetof(struct spdk_nvme_tcp_h2c_data_hdr, cccid);
|
||||||
} else {
|
} else {
|
||||||
error_offset = offsetof(struct spdk_nvme_tcp_h2c_data_hdr, ttag);
|
error_offset = offsetof(struct spdk_nvme_tcp_h2c_data_hdr, ttag);
|
||||||
@ -1341,8 +1373,19 @@ static void
|
|||||||
spdk_nvmf_tcp_r2t_complete(void *cb_arg)
|
spdk_nvmf_tcp_r2t_complete(void *cb_arg)
|
||||||
{
|
{
|
||||||
struct spdk_nvmf_tcp_req *tcp_req = cb_arg;
|
struct spdk_nvmf_tcp_req *tcp_req = cb_arg;
|
||||||
|
struct spdk_nvmf_tcp_transport *ttransport;
|
||||||
|
|
||||||
nvmf_tcp_req_pdu_fini(tcp_req);
|
nvmf_tcp_req_pdu_fini(tcp_req);
|
||||||
|
|
||||||
|
ttransport = SPDK_CONTAINEROF(tcp_req->req.qpair->transport,
|
||||||
|
struct spdk_nvmf_tcp_transport, transport);
|
||||||
|
|
||||||
|
spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER);
|
||||||
|
|
||||||
|
if (tcp_req->h2c_offset == tcp_req->req.length) {
|
||||||
|
spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE);
|
||||||
|
spdk_nvmf_tcp_req_process(ttransport, tcp_req);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
@ -1369,6 +1412,8 @@ spdk_nvmf_tcp_send_r2t_pdu(struct spdk_nvmf_tcp_qpair *tqpair,
|
|||||||
r2t->r2to = tcp_req->h2c_offset;
|
r2t->r2to = tcp_req->h2c_offset;
|
||||||
r2t->r2tl = tcp_req->req.length;
|
r2t->r2tl = tcp_req->req.length;
|
||||||
|
|
||||||
|
spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_AWAITING_R2T_ACK);
|
||||||
|
|
||||||
SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP,
|
SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP,
|
||||||
"tcp_req(%p) on tqpair(%p), r2t_info: cccid=%u, ttag=%u, r2to=%u, r2tl=%u\n",
|
"tcp_req(%p) on tqpair(%p), r2t_info: cccid=%u, ttag=%u, r2to=%u, r2tl=%u\n",
|
||||||
tcp_req, tqpair, r2t->cccid, r2t->ttag, r2t->r2to, r2t->r2tl);
|
tcp_req, tqpair, r2t->cccid, r2t->ttag, r2t->r2to, r2t->r2tl);
|
||||||
@ -1391,7 +1436,10 @@ spdk_nvmf_tcp_h2c_data_payload_handle(struct spdk_nvmf_tcp_transport *ttransport
|
|||||||
|
|
||||||
spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
|
spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
|
||||||
|
|
||||||
if (tcp_req->h2c_offset == tcp_req->req.length) {
|
/* Wait for all of the data to arrive AND for the initial R2T PDU send to be
|
||||||
|
* acknowledged before moving on. */
|
||||||
|
if (tcp_req->h2c_offset == tcp_req->req.length &&
|
||||||
|
tcp_req->state == TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER) {
|
||||||
spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE);
|
spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE);
|
||||||
spdk_nvmf_tcp_req_process(ttransport, tcp_req);
|
spdk_nvmf_tcp_req_process(ttransport, tcp_req);
|
||||||
}
|
}
|
||||||
@ -2245,14 +2293,14 @@ spdk_nvmf_tcp_req_process(struct spdk_nvmf_tcp_transport *ttransport,
|
|||||||
|
|
||||||
/* If data is transferring from host to controller, we need to do a transfer from the host. */
|
/* If data is transferring from host to controller, we need to do a transfer from the host. */
|
||||||
if (tcp_req->req.xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) {
|
if (tcp_req->req.xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) {
|
||||||
spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER);
|
|
||||||
if (tcp_req->req.data_from_pool) {
|
if (tcp_req->req.data_from_pool) {
|
||||||
SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Will send r2t for tcp_req(%p) on tqpair=%p\n", tcp_req, tqpair);
|
SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Sending R2T for tcp_req(%p) on tqpair=%p\n", tcp_req, tqpair);
|
||||||
tcp_req->h2c_offset = 0;
|
|
||||||
spdk_nvmf_tcp_send_r2t_pdu(tqpair, tcp_req);
|
spdk_nvmf_tcp_send_r2t_pdu(tqpair, tcp_req);
|
||||||
} else {
|
} else {
|
||||||
struct nvme_tcp_pdu *pdu;
|
struct nvme_tcp_pdu *pdu;
|
||||||
|
|
||||||
|
spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER);
|
||||||
|
|
||||||
pdu = &tqpair->pdu_in_progress;
|
pdu = &tqpair->pdu_in_progress;
|
||||||
SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Not need to send r2t for tcp_req(%p) on tqpair=%p\n", tcp_req,
|
SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Not need to send r2t for tcp_req(%p) on tqpair=%p\n", tcp_req,
|
||||||
tqpair);
|
tqpair);
|
||||||
@ -2266,7 +2314,12 @@ spdk_nvmf_tcp_req_process(struct spdk_nvmf_tcp_transport *ttransport,
|
|||||||
|
|
||||||
spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE);
|
spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE);
|
||||||
break;
|
break;
|
||||||
|
case TCP_REQUEST_STATE_AWAITING_R2T_ACK:
|
||||||
|
spdk_trace_record(TRACE_TCP_REQUEST_STATE_AWAIT_R2T_ACK, 0, 0, (uintptr_t)tcp_req, 0);
|
||||||
|
/* The R2T completion or the h2c data incoming will kick it out of this state. */
|
||||||
|
break;
|
||||||
case TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER:
|
case TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER:
|
||||||
|
|
||||||
spdk_trace_record(TRACE_TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER, 0, 0,
|
spdk_trace_record(TRACE_TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER, 0, 0,
|
||||||
(uintptr_t)tcp_req, 0);
|
(uintptr_t)tcp_req, 0);
|
||||||
/* Some external code must kick a request into TCP_REQUEST_STATE_READY_TO_EXECUTE
|
/* Some external code must kick a request into TCP_REQUEST_STATE_READY_TO_EXECUTE
|
||||||
|
Loading…
Reference in New Issue
Block a user