nvmf/tcp: Use a big buffer for PDU receving.
Purpose: Reduce the recv/readv system call. Method: Use a big recv buffer to conduct the read. Though it will introduce addtional buffer copy, we hope that the overhead introduced by buffer copy will be smaller compared with frequent recv/readv system call overhead. And the design is to make a trade off between them. Signed-off-by: Ziye Yang <ziye.yang@intel.com> Change-Id: I9286fd9cec0b512cea8e3f2c335c5bf862b98573 Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/464842 Reviewed-by: Changpeng Liu <changpeng.liu@intel.com> Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com> Reviewed-by: Broadcom SPDK FC-NVMe CI <spdk-ci.pdl@broadcom.com> Reviewed-by: Jim Harris <james.r.harris@intel.com> Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
This commit is contained in:
parent
ea5ad0b286
commit
d50736776c
148
lib/nvmf/tcp.c
148
lib/nvmf/tcp.c
@ -55,6 +55,7 @@
|
||||
#define NVMF_TCP_PDU_MAX_C2H_DATA_SIZE 131072
|
||||
#define NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM 64 /* Maximal c2h_data pdu number for ecah tqpair */
|
||||
#define SPDK_NVMF_TCP_DEFAULT_MAX_SOCK_PRIORITY 6
|
||||
#define SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR 4
|
||||
|
||||
/* spdk nvmf related structure */
|
||||
enum spdk_nvmf_tcp_req_state {
|
||||
@ -198,6 +199,13 @@ struct spdk_nvmf_tcp_req {
|
||||
TAILQ_ENTRY(spdk_nvmf_tcp_req) state_link;
|
||||
};
|
||||
|
||||
struct nvme_tcp_pdu_recv_buf {
|
||||
char *buf;
|
||||
uint32_t off;
|
||||
uint32_t size;
|
||||
uint32_t remain_size;
|
||||
};
|
||||
|
||||
struct spdk_nvmf_tcp_qpair {
|
||||
struct spdk_nvmf_qpair qpair;
|
||||
struct spdk_nvmf_tcp_poll_group *group;
|
||||
@ -209,6 +217,7 @@ struct spdk_nvmf_tcp_qpair {
|
||||
enum nvme_tcp_qpair_state state;
|
||||
|
||||
struct nvme_tcp_pdu pdu_in_progress;
|
||||
struct nvme_tcp_pdu_recv_buf pdu_recv_buf;
|
||||
|
||||
TAILQ_HEAD(, nvme_tcp_pdu) send_queue;
|
||||
TAILQ_HEAD(, nvme_tcp_pdu) free_queue;
|
||||
@ -500,6 +509,7 @@ spdk_nvmf_tcp_qpair_destroy(struct spdk_nvmf_tcp_qpair *tqpair)
|
||||
free(tqpair->reqs);
|
||||
spdk_free(tqpair->buf);
|
||||
spdk_free(tqpair->bufs);
|
||||
free(tqpair->pdu_recv_buf.buf);
|
||||
free(tqpair);
|
||||
SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Leave\n");
|
||||
}
|
||||
@ -982,6 +992,14 @@ spdk_nvmf_tcp_qpair_init_mem_resource(struct spdk_nvmf_tcp_qpair *tqpair, uint16
|
||||
TAILQ_INSERT_TAIL(&tqpair->free_queue, &tqpair->pdu[i], tailq);
|
||||
}
|
||||
|
||||
tqpair->pdu_recv_buf.size = (in_capsule_data_size + sizeof(struct spdk_nvme_tcp_cmd) + 2 *
|
||||
SPDK_NVME_TCP_DIGEST_LEN) * SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR;
|
||||
tqpair->pdu_recv_buf.buf = calloc(1, tqpair->pdu_recv_buf.size);
|
||||
if (!tqpair->pdu_recv_buf.buf) {
|
||||
SPDK_ERRLOG("Unable to allocate the pdu recv buf on tqpair=%p with size=%d\n", tqpair,
|
||||
tqpair->pdu_recv_buf.size);
|
||||
return -1;
|
||||
}
|
||||
} else {
|
||||
tqpair->reqs = calloc(size, sizeof(*tqpair->reqs));
|
||||
if (!tqpair->reqs) {
|
||||
@ -1707,7 +1725,13 @@ spdk_nvmf_tcp_icreq_handle(struct spdk_nvmf_tcp_transport *ttransport,
|
||||
SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "maxr2t =%u\n", (ic_req->maxr2t + 1u));
|
||||
|
||||
tqpair->host_hdgst_enable = ic_req->dgst.bits.hdgst_enable ? true : false;
|
||||
if (!tqpair->host_hdgst_enable) {
|
||||
tqpair->pdu_recv_buf.size -= SPDK_NVME_TCP_DIGEST_LEN * SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR;
|
||||
}
|
||||
tqpair->host_ddgst_enable = ic_req->dgst.bits.ddgst_enable ? true : false;
|
||||
if (!tqpair->host_ddgst_enable) {
|
||||
tqpair->pdu_recv_buf.size -= SPDK_NVME_TCP_DIGEST_LEN * SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR;
|
||||
}
|
||||
|
||||
tqpair->cpda = spdk_min(ic_req->hpda, SPDK_NVME_TCP_CPDA_MAX);
|
||||
SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "cpda of tqpair=(%p) is : %u\n", tqpair, tqpair->cpda);
|
||||
@ -1902,7 +1926,65 @@ nvmf_tcp_pdu_payload_insert_dif(struct nvme_tcp_pdu *pdu, uint32_t read_offset,
|
||||
return rc;
|
||||
}
|
||||
|
||||
#define MAX_NVME_TCP_PDU_LOOP_COUNT 32
|
||||
static int
|
||||
nvme_tcp_recv_buf_read(struct spdk_sock *sock, struct nvme_tcp_pdu_recv_buf *pdu_recv_buf)
|
||||
{
|
||||
int rc;
|
||||
|
||||
assert(pdu_recv_buf->off == 0);
|
||||
assert(pdu_recv_buf->remain_size == 0);
|
||||
rc = nvme_tcp_read_data(sock, pdu_recv_buf->size,
|
||||
pdu_recv_buf->buf);
|
||||
if (rc < 0) {
|
||||
SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "will disconnect sock=%p\n", sock);
|
||||
} else if (rc > 0) {
|
||||
pdu_recv_buf->remain_size = rc;
|
||||
spdk_trace_record(TRACE_TCP_READ_FROM_SOCKET_DONE, 0, rc, 0, 0);
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
nvme_tcp_read_data_from_pdu_recv_buf(struct nvme_tcp_pdu_recv_buf *pdu_recv_buf,
|
||||
uint32_t expected_size,
|
||||
char *dst)
|
||||
{
|
||||
uint32_t size;
|
||||
|
||||
assert(pdu_recv_buf->remain_size > 0);
|
||||
size = spdk_min(expected_size, pdu_recv_buf->remain_size);
|
||||
memcpy(dst, (void *)pdu_recv_buf->buf + pdu_recv_buf->off, size);
|
||||
pdu_recv_buf->off += size;
|
||||
pdu_recv_buf->remain_size -= size;
|
||||
if (spdk_unlikely(!pdu_recv_buf->remain_size)) {
|
||||
pdu_recv_buf->off = 0;
|
||||
}
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
static int
|
||||
nvme_tcp_read_payload_data_from_pdu_recv_buf(struct nvme_tcp_pdu_recv_buf *pdu_recv_buf,
|
||||
struct nvme_tcp_pdu *pdu)
|
||||
{
|
||||
struct iovec iov[NVME_TCP_MAX_SGL_DESCRIPTORS + 1];
|
||||
int iovcnt, i;
|
||||
uint32_t size = 0;
|
||||
|
||||
assert(pdu_recv_buf->remain_size > 0);
|
||||
iovcnt = nvme_tcp_build_payload_iovs(iov, NVME_TCP_MAX_SGL_DESCRIPTORS + 1, pdu,
|
||||
pdu->ddgst_enable, NULL);
|
||||
assert(iovcnt >= 0);
|
||||
for (i = 0; i < iovcnt; i++) {
|
||||
if (!pdu_recv_buf->remain_size) {
|
||||
break;
|
||||
}
|
||||
size += nvme_tcp_read_data_from_pdu_recv_buf(pdu_recv_buf, iov[i].iov_len, iov[i].iov_base);
|
||||
}
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
static int
|
||||
spdk_nvmf_tcp_sock_process(struct spdk_nvmf_tcp_qpair *tqpair)
|
||||
@ -1910,7 +1992,7 @@ spdk_nvmf_tcp_sock_process(struct spdk_nvmf_tcp_qpair *tqpair)
|
||||
int rc = 0;
|
||||
struct nvme_tcp_pdu *pdu;
|
||||
enum nvme_tcp_pdu_recv_state prev_state;
|
||||
uint32_t data_len, current_pdu_num = 0;
|
||||
uint32_t data_len;
|
||||
|
||||
/* The loop here is to allow for several back-to-back state changes. */
|
||||
do {
|
||||
@ -1922,19 +2004,19 @@ spdk_nvmf_tcp_sock_process(struct spdk_nvmf_tcp_qpair *tqpair)
|
||||
/* Wait for the common header */
|
||||
case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY:
|
||||
case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH:
|
||||
rc = nvme_tcp_read_data(tqpair->sock,
|
||||
sizeof(struct spdk_nvme_tcp_common_pdu_hdr) - pdu->ch_valid_bytes,
|
||||
(void *)&pdu->hdr->common + pdu->ch_valid_bytes);
|
||||
if (rc < 0) {
|
||||
SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "will disconnect tqpair=%p\n", tqpair);
|
||||
return NVME_TCP_PDU_FATAL;
|
||||
} else if (rc > 0) {
|
||||
pdu->ch_valid_bytes += rc;
|
||||
spdk_trace_record(TRACE_TCP_READ_FROM_SOCKET_DONE, 0, rc, 0, 0);
|
||||
if (spdk_likely(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY)) {
|
||||
spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH);
|
||||
if (!tqpair->pdu_recv_buf.remain_size) {
|
||||
rc = nvme_tcp_recv_buf_read(tqpair->sock, &tqpair->pdu_recv_buf);
|
||||
if (rc <= 0) {
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
rc = nvme_tcp_read_data_from_pdu_recv_buf(&tqpair->pdu_recv_buf,
|
||||
sizeof(struct spdk_nvme_tcp_common_pdu_hdr) - pdu->ch_valid_bytes,
|
||||
(void *)&pdu->hdr->common + pdu->ch_valid_bytes);
|
||||
pdu->ch_valid_bytes += rc;
|
||||
if (spdk_likely(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY)) {
|
||||
spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH);
|
||||
}
|
||||
|
||||
if (pdu->ch_valid_bytes < sizeof(struct spdk_nvme_tcp_common_pdu_hdr)) {
|
||||
return NVME_TCP_PDU_IN_PROGRESS;
|
||||
@ -1945,25 +2027,23 @@ spdk_nvmf_tcp_sock_process(struct spdk_nvmf_tcp_qpair *tqpair)
|
||||
break;
|
||||
/* Wait for the pdu specific header */
|
||||
case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH:
|
||||
rc = nvme_tcp_read_data(tqpair->sock,
|
||||
pdu->psh_len - pdu->psh_valid_bytes,
|
||||
(void *)&pdu->hdr->raw + sizeof(struct spdk_nvme_tcp_common_pdu_hdr) + pdu->psh_valid_bytes);
|
||||
if (rc < 0) {
|
||||
return NVME_TCP_PDU_FATAL;
|
||||
} else if (rc > 0) {
|
||||
spdk_trace_record(TRACE_TCP_READ_FROM_SOCKET_DONE,
|
||||
0, rc, 0, 0);
|
||||
pdu->psh_valid_bytes += rc;
|
||||
if (!tqpair->pdu_recv_buf.remain_size) {
|
||||
rc = nvme_tcp_recv_buf_read(tqpair->sock, &tqpair->pdu_recv_buf);
|
||||
if (rc <= 0) {
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
|
||||
rc = nvme_tcp_read_data_from_pdu_recv_buf(&tqpair->pdu_recv_buf,
|
||||
pdu->psh_len - pdu->psh_valid_bytes,
|
||||
(void *)&pdu->hdr->raw + sizeof(struct spdk_nvme_tcp_common_pdu_hdr) + pdu->psh_valid_bytes);
|
||||
pdu->psh_valid_bytes += rc;
|
||||
if (pdu->psh_valid_bytes < pdu->psh_len) {
|
||||
return NVME_TCP_PDU_IN_PROGRESS;
|
||||
}
|
||||
|
||||
/* All header(ch, psh, head digist) of this PDU has now been read from the socket. */
|
||||
spdk_nvmf_tcp_pdu_psh_handle(tqpair);
|
||||
if (tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY) {
|
||||
current_pdu_num++;
|
||||
}
|
||||
break;
|
||||
case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD:
|
||||
/* check whether the data is valid, if not we just return */
|
||||
@ -1979,11 +2059,18 @@ spdk_nvmf_tcp_sock_process(struct spdk_nvmf_tcp_qpair *tqpair)
|
||||
pdu->ddgst_enable = true;
|
||||
}
|
||||
|
||||
rc = nvme_tcp_read_payload_data(tqpair->sock, pdu);
|
||||
if (rc < 0) {
|
||||
return NVME_TCP_PDU_IN_PROGRESS;
|
||||
if (tqpair->pdu_recv_buf.remain_size) {
|
||||
rc = nvme_tcp_read_payload_data_from_pdu_recv_buf(&tqpair->pdu_recv_buf, pdu);
|
||||
pdu->readv_offset += rc;
|
||||
}
|
||||
|
||||
if (pdu->readv_offset < data_len) {
|
||||
rc = nvme_tcp_read_payload_data(tqpair->sock, pdu);
|
||||
if (rc < 0) {
|
||||
return NVME_TCP_PDU_IN_PROGRESS;
|
||||
}
|
||||
pdu->readv_offset += rc;
|
||||
}
|
||||
pdu->readv_offset += rc;
|
||||
|
||||
if (spdk_unlikely(pdu->dif_ctx != NULL)) {
|
||||
rc = nvmf_tcp_pdu_payload_insert_dif(pdu, pdu->readv_offset - rc, rc);
|
||||
@ -1998,7 +2085,6 @@ spdk_nvmf_tcp_sock_process(struct spdk_nvmf_tcp_qpair *tqpair)
|
||||
|
||||
/* All of this PDU has now been read from the socket. */
|
||||
spdk_nvmf_tcp_pdu_payload_handle(tqpair);
|
||||
current_pdu_num++;
|
||||
break;
|
||||
case NVME_TCP_PDU_RECV_STATE_ERROR:
|
||||
/* Check whether the connection is closed. Each time, we only read 1 byte every time */
|
||||
@ -2012,7 +2098,7 @@ spdk_nvmf_tcp_sock_process(struct spdk_nvmf_tcp_qpair *tqpair)
|
||||
SPDK_ERRLOG("code should not come to here");
|
||||
break;
|
||||
}
|
||||
} while ((tqpair->recv_state != prev_state) && (current_pdu_num < MAX_NVME_TCP_PDU_LOOP_COUNT));
|
||||
} while (tqpair->recv_state != prev_state);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user