diff --git a/lib/nvmf/tcp.c b/lib/nvmf/tcp.c index 0914ad8f9..faac49b7e 100644 --- a/lib/nvmf/tcp.c +++ b/lib/nvmf/tcp.c @@ -55,6 +55,7 @@ #define NVMF_TCP_PDU_MAX_C2H_DATA_SIZE 131072 #define NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM 64 /* Maximal c2h_data pdu number for ecah tqpair */ #define SPDK_NVMF_TCP_DEFAULT_MAX_SOCK_PRIORITY 6 +#define SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR 4 /* spdk nvmf related structure */ enum spdk_nvmf_tcp_req_state { @@ -198,6 +199,13 @@ struct spdk_nvmf_tcp_req { TAILQ_ENTRY(spdk_nvmf_tcp_req) state_link; }; +struct nvme_tcp_pdu_recv_buf { + char *buf; + uint32_t off; + uint32_t size; + uint32_t remain_size; +}; + struct spdk_nvmf_tcp_qpair { struct spdk_nvmf_qpair qpair; struct spdk_nvmf_tcp_poll_group *group; @@ -209,6 +217,7 @@ struct spdk_nvmf_tcp_qpair { enum nvme_tcp_qpair_state state; struct nvme_tcp_pdu pdu_in_progress; + struct nvme_tcp_pdu_recv_buf pdu_recv_buf; TAILQ_HEAD(, nvme_tcp_pdu) send_queue; TAILQ_HEAD(, nvme_tcp_pdu) free_queue; @@ -500,6 +509,7 @@ spdk_nvmf_tcp_qpair_destroy(struct spdk_nvmf_tcp_qpair *tqpair) free(tqpair->reqs); spdk_free(tqpair->buf); spdk_free(tqpair->bufs); + free(tqpair->pdu_recv_buf.buf); free(tqpair); SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Leave\n"); } @@ -982,6 +992,14 @@ spdk_nvmf_tcp_qpair_init_mem_resource(struct spdk_nvmf_tcp_qpair *tqpair, uint16 TAILQ_INSERT_TAIL(&tqpair->free_queue, &tqpair->pdu[i], tailq); } + tqpair->pdu_recv_buf.size = (in_capsule_data_size + sizeof(struct spdk_nvme_tcp_cmd) + 2 * + SPDK_NVME_TCP_DIGEST_LEN) * SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR; + tqpair->pdu_recv_buf.buf = calloc(1, tqpair->pdu_recv_buf.size); + if (!tqpair->pdu_recv_buf.buf) { + SPDK_ERRLOG("Unable to allocate the pdu recv buf on tqpair=%p with size=%d\n", tqpair, + tqpair->pdu_recv_buf.size); + return -1; + } } else { tqpair->reqs = calloc(size, sizeof(*tqpair->reqs)); if (!tqpair->reqs) { @@ -1707,7 +1725,13 @@ spdk_nvmf_tcp_icreq_handle(struct spdk_nvmf_tcp_transport *ttransport, SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "maxr2t =%u\n", (ic_req->maxr2t + 1u)); tqpair->host_hdgst_enable = ic_req->dgst.bits.hdgst_enable ? true : false; + if (!tqpair->host_hdgst_enable) { + tqpair->pdu_recv_buf.size -= SPDK_NVME_TCP_DIGEST_LEN * SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR; + } tqpair->host_ddgst_enable = ic_req->dgst.bits.ddgst_enable ? true : false; + if (!tqpair->host_ddgst_enable) { + tqpair->pdu_recv_buf.size -= SPDK_NVME_TCP_DIGEST_LEN * SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR; + } tqpair->cpda = spdk_min(ic_req->hpda, SPDK_NVME_TCP_CPDA_MAX); SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "cpda of tqpair=(%p) is : %u\n", tqpair, tqpair->cpda); @@ -1902,7 +1926,65 @@ nvmf_tcp_pdu_payload_insert_dif(struct nvme_tcp_pdu *pdu, uint32_t read_offset, return rc; } -#define MAX_NVME_TCP_PDU_LOOP_COUNT 32 +static int +nvme_tcp_recv_buf_read(struct spdk_sock *sock, struct nvme_tcp_pdu_recv_buf *pdu_recv_buf) +{ + int rc; + + assert(pdu_recv_buf->off == 0); + assert(pdu_recv_buf->remain_size == 0); + rc = nvme_tcp_read_data(sock, pdu_recv_buf->size, + pdu_recv_buf->buf); + if (rc < 0) { + SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "will disconnect sock=%p\n", sock); + } else if (rc > 0) { + pdu_recv_buf->remain_size = rc; + spdk_trace_record(TRACE_TCP_READ_FROM_SOCKET_DONE, 0, rc, 0, 0); + } + + return rc; +} + +static uint32_t +nvme_tcp_read_data_from_pdu_recv_buf(struct nvme_tcp_pdu_recv_buf *pdu_recv_buf, + uint32_t expected_size, + char *dst) +{ + uint32_t size; + + assert(pdu_recv_buf->remain_size > 0); + size = spdk_min(expected_size, pdu_recv_buf->remain_size); + memcpy(dst, (void *)pdu_recv_buf->buf + pdu_recv_buf->off, size); + pdu_recv_buf->off += size; + pdu_recv_buf->remain_size -= size; + if (spdk_unlikely(!pdu_recv_buf->remain_size)) { + pdu_recv_buf->off = 0; + } + + return size; +} + +static int +nvme_tcp_read_payload_data_from_pdu_recv_buf(struct nvme_tcp_pdu_recv_buf *pdu_recv_buf, + struct nvme_tcp_pdu *pdu) +{ + struct iovec iov[NVME_TCP_MAX_SGL_DESCRIPTORS + 1]; + int iovcnt, i; + uint32_t size = 0; + + assert(pdu_recv_buf->remain_size > 0); + iovcnt = nvme_tcp_build_payload_iovs(iov, NVME_TCP_MAX_SGL_DESCRIPTORS + 1, pdu, + pdu->ddgst_enable, NULL); + assert(iovcnt >= 0); + for (i = 0; i < iovcnt; i++) { + if (!pdu_recv_buf->remain_size) { + break; + } + size += nvme_tcp_read_data_from_pdu_recv_buf(pdu_recv_buf, iov[i].iov_len, iov[i].iov_base); + } + + return size; +} static int spdk_nvmf_tcp_sock_process(struct spdk_nvmf_tcp_qpair *tqpair) @@ -1910,7 +1992,7 @@ spdk_nvmf_tcp_sock_process(struct spdk_nvmf_tcp_qpair *tqpair) int rc = 0; struct nvme_tcp_pdu *pdu; enum nvme_tcp_pdu_recv_state prev_state; - uint32_t data_len, current_pdu_num = 0; + uint32_t data_len; /* The loop here is to allow for several back-to-back state changes. */ do { @@ -1922,19 +2004,19 @@ spdk_nvmf_tcp_sock_process(struct spdk_nvmf_tcp_qpair *tqpair) /* Wait for the common header */ case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY: case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH: - rc = nvme_tcp_read_data(tqpair->sock, - sizeof(struct spdk_nvme_tcp_common_pdu_hdr) - pdu->ch_valid_bytes, - (void *)&pdu->hdr->common + pdu->ch_valid_bytes); - if (rc < 0) { - SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "will disconnect tqpair=%p\n", tqpair); - return NVME_TCP_PDU_FATAL; - } else if (rc > 0) { - pdu->ch_valid_bytes += rc; - spdk_trace_record(TRACE_TCP_READ_FROM_SOCKET_DONE, 0, rc, 0, 0); - if (spdk_likely(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY)) { - spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH); + if (!tqpair->pdu_recv_buf.remain_size) { + rc = nvme_tcp_recv_buf_read(tqpair->sock, &tqpair->pdu_recv_buf); + if (rc <= 0) { + return rc; } } + rc = nvme_tcp_read_data_from_pdu_recv_buf(&tqpair->pdu_recv_buf, + sizeof(struct spdk_nvme_tcp_common_pdu_hdr) - pdu->ch_valid_bytes, + (void *)&pdu->hdr->common + pdu->ch_valid_bytes); + pdu->ch_valid_bytes += rc; + if (spdk_likely(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY)) { + spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH); + } if (pdu->ch_valid_bytes < sizeof(struct spdk_nvme_tcp_common_pdu_hdr)) { return NVME_TCP_PDU_IN_PROGRESS; @@ -1945,25 +2027,23 @@ spdk_nvmf_tcp_sock_process(struct spdk_nvmf_tcp_qpair *tqpair) break; /* Wait for the pdu specific header */ case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH: - rc = nvme_tcp_read_data(tqpair->sock, - pdu->psh_len - pdu->psh_valid_bytes, - (void *)&pdu->hdr->raw + sizeof(struct spdk_nvme_tcp_common_pdu_hdr) + pdu->psh_valid_bytes); - if (rc < 0) { - return NVME_TCP_PDU_FATAL; - } else if (rc > 0) { - spdk_trace_record(TRACE_TCP_READ_FROM_SOCKET_DONE, - 0, rc, 0, 0); - pdu->psh_valid_bytes += rc; + if (!tqpair->pdu_recv_buf.remain_size) { + rc = nvme_tcp_recv_buf_read(tqpair->sock, &tqpair->pdu_recv_buf); + if (rc <= 0) { + return rc; + } } + + rc = nvme_tcp_read_data_from_pdu_recv_buf(&tqpair->pdu_recv_buf, + pdu->psh_len - pdu->psh_valid_bytes, + (void *)&pdu->hdr->raw + sizeof(struct spdk_nvme_tcp_common_pdu_hdr) + pdu->psh_valid_bytes); + pdu->psh_valid_bytes += rc; if (pdu->psh_valid_bytes < pdu->psh_len) { return NVME_TCP_PDU_IN_PROGRESS; } /* All header(ch, psh, head digist) of this PDU has now been read from the socket. */ spdk_nvmf_tcp_pdu_psh_handle(tqpair); - if (tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY) { - current_pdu_num++; - } break; case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD: /* check whether the data is valid, if not we just return */ @@ -1979,11 +2059,18 @@ spdk_nvmf_tcp_sock_process(struct spdk_nvmf_tcp_qpair *tqpair) pdu->ddgst_enable = true; } - rc = nvme_tcp_read_payload_data(tqpair->sock, pdu); - if (rc < 0) { - return NVME_TCP_PDU_IN_PROGRESS; + if (tqpair->pdu_recv_buf.remain_size) { + rc = nvme_tcp_read_payload_data_from_pdu_recv_buf(&tqpair->pdu_recv_buf, pdu); + pdu->readv_offset += rc; + } + + if (pdu->readv_offset < data_len) { + rc = nvme_tcp_read_payload_data(tqpair->sock, pdu); + if (rc < 0) { + return NVME_TCP_PDU_IN_PROGRESS; + } + pdu->readv_offset += rc; } - pdu->readv_offset += rc; if (spdk_unlikely(pdu->dif_ctx != NULL)) { rc = nvmf_tcp_pdu_payload_insert_dif(pdu, pdu->readv_offset - rc, rc); @@ -1998,7 +2085,6 @@ spdk_nvmf_tcp_sock_process(struct spdk_nvmf_tcp_qpair *tqpair) /* All of this PDU has now been read from the socket. */ spdk_nvmf_tcp_pdu_payload_handle(tqpair); - current_pdu_num++; break; case NVME_TCP_PDU_RECV_STATE_ERROR: /* Check whether the connection is closed. Each time, we only read 1 byte every time */ @@ -2012,7 +2098,7 @@ spdk_nvmf_tcp_sock_process(struct spdk_nvmf_tcp_qpair *tqpair) SPDK_ERRLOG("code should not come to here"); break; } - } while ((tqpair->recv_state != prev_state) && (current_pdu_num < MAX_NVME_TCP_PDU_LOOP_COUNT)); + } while (tqpair->recv_state != prev_state); return rc; }