lib/nvmf: use DSA to offload recv data digest crc32 in nvmf-TCP

allow DSA device to async offload crc32 calculation in nvmf-TCP

This patch can use DSA to accelerate crc32 computation, making
the io performance of TCP paths using crc32 approach the io
performance of TCP paths that do not use crc32.

Using SLIST to minimize the performance drop. SLIST has less
operation compared to TAILQ.

Thinking about memory thrashing, we should use the same memory as
possible to receive new PDUs. So, insert newly freed PDU in to head
is better.

The performance drop is within 1% compared to the TCP path without
crc32.

Signed-off-by: MengjinWu <mengjin.wu@intel.com>
Change-Id: I480eb8db25f0e730cb198ca5ec19dbe3b4d38440
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/11708
Reviewed-by: Shuhei Matsumoto <smatsumoto@nvidia.com>
Reviewed-by: Changpeng Liu <changpeng.liu@intel.com>
Reviewed-by: GangCao <gang.cao@intel.com>
Reviewed-by: Konrad Sztyber <konrad.sztyber@intel.com>
Community-CI: Mellanox Build Bot
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
This commit is contained in:
MengjinWu 2022-07-21 02:54:58 +00:00 committed by Konrad Sztyber
parent b46cfdb6c9
commit 7fc2c0856e
3 changed files with 88 additions and 65 deletions

View File

@ -98,6 +98,7 @@ struct nvme_tcp_pdu {
void *req; /* data tied to a tcp request */
void *qpair;
SLIST_ENTRY(nvme_tcp_pdu) slist;
};
SPDK_STATIC_ASSERT(offsetof(struct nvme_tcp_pdu,
sock_req) + sizeof(struct spdk_sock_request) == offsetof(struct nvme_tcp_pdu, iov),

View File

@ -246,6 +246,7 @@ struct spdk_nvmf_tcp_qpair {
/* Queues to track the requests in all states */
TAILQ_HEAD(, spdk_nvmf_tcp_req) tcp_req_working_queue;
TAILQ_HEAD(, spdk_nvmf_tcp_req) tcp_req_free_queue;
SLIST_HEAD(, nvme_tcp_pdu) tcp_pdu_free_queue;
/* Number of requests in each state */
uint32_t state_cntr[TCP_REQUEST_NUM_STATES];
@ -392,7 +393,7 @@ nvmf_tcp_req_get(struct spdk_nvmf_tcp_qpair *tqpair)
struct spdk_nvmf_tcp_req *tcp_req;
tcp_req = TAILQ_FIRST(&tqpair->tcp_req_free_queue);
if (!tcp_req) {
if (spdk_unlikely(!tcp_req)) {
return NULL;
}
@ -963,25 +964,25 @@ static void
pdu_data_crc32_compute(struct nvme_tcp_pdu *pdu)
{
struct spdk_nvmf_tcp_qpair *tqpair = pdu->qpair;
uint32_t crc32c;
int rc = 0;
/* Data Digest */
if (pdu->data_len > 0 && g_nvme_tcp_ddgst[pdu->hdr.common.pdu_type] && tqpair->host_ddgst_enable) {
/* Only suport this limitated case for the first step */
if (spdk_likely(!pdu->dif_ctx && (pdu->data_len % SPDK_NVME_TCP_DIGEST_ALIGNMENT == 0)
&& tqpair->group)) {
if (!spdk_accel_submit_crc32cv(tqpair->group->accel_channel, &pdu->data_digest_crc32, pdu->data_iov,
pdu->data_iovcnt, 0, data_crc32_accel_done, pdu)) {
rc = spdk_accel_submit_crc32cv(tqpair->group->accel_channel, &pdu->data_digest_crc32, pdu->data_iov,
pdu->data_iovcnt, 0, data_crc32_accel_done, pdu);
if (spdk_likely(rc == 0)) {
return;
}
} else {
pdu->data_digest_crc32 = nvme_tcp_pdu_calc_data_digest(pdu);
}
crc32c = nvme_tcp_pdu_calc_data_digest(pdu);
crc32c = crc32c ^ SPDK_CRC32C_XOR;
MAKE_DIGEST_WORD(pdu->data_digest, crc32c);
data_crc32_accel_done(pdu, rc);
} else {
_tcp_write_pdu(pdu);
}
_tcp_write_pdu(pdu);
}
static void
@ -1073,9 +1074,10 @@ nvmf_tcp_qpair_init_mem_resource(struct spdk_nvmf_tcp_qpair *tqpair)
return -1;
}
}
/* Add additional 2 members, which will be used for mgmt_pdu and pdu_in_progress owned by the tqpair */
tqpair->pdus = spdk_dma_zmalloc((tqpair->resource_count + 2) * sizeof(*tqpair->pdus), 0x1000, NULL);
/* prepare memory space for receiving pdus and tcp_req */
/* Add additional 1 member, which will be used for mgmt_pdu owned by the tqpair */
tqpair->pdus = spdk_dma_zmalloc((2 * tqpair->resource_count + 1) * sizeof(*tqpair->pdus), 0x1000,
NULL);
if (!tqpair->pdus) {
SPDK_ERRLOG("Unable to allocate pdu pool on tqpair =%p.\n", tqpair);
return -1;
@ -1107,9 +1109,17 @@ nvmf_tcp_qpair_init_mem_resource(struct spdk_nvmf_tcp_qpair *tqpair)
tqpair->state_cntr[TCP_REQUEST_STATE_FREE]++;
}
for (; i < 2 * tqpair->resource_count; i++) {
struct nvme_tcp_pdu *pdu = &tqpair->pdus[i];
pdu->qpair = tqpair;
SLIST_INSERT_HEAD(&tqpair->tcp_pdu_free_queue, pdu, slist);
}
tqpair->mgmt_pdu = &tqpair->pdus[i];
tqpair->mgmt_pdu->qpair = tqpair;
tqpair->pdu_in_progress = &tqpair->pdus[i + 1];
tqpair->pdu_in_progress = SLIST_FIRST(&tqpair->tcp_pdu_free_queue);
SLIST_REMOVE_HEAD(&tqpair->tcp_pdu_free_queue, slist);
tqpair->recv_buf_size = (in_capsule_data_size + sizeof(struct spdk_nvme_tcp_cmd) + 2 *
SPDK_NVME_TCP_DIGEST_LEN) * SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR;
@ -1131,6 +1141,7 @@ nvmf_tcp_qpair_init(struct spdk_nvmf_qpair *qpair)
/* Initialise request state queues of the qpair */
TAILQ_INIT(&tqpair->tcp_req_free_queue);
TAILQ_INIT(&tqpair->tcp_req_working_queue);
SLIST_INIT(&tqpair->tcp_pdu_free_queue);
tqpair->host_hdgst_enable = true;
tqpair->host_ddgst_enable = true;
@ -1426,6 +1437,9 @@ nvmf_tcp_qpair_set_recv_state(struct spdk_nvmf_tcp_qpair *tqpair,
/* When leaving the await req state, move the qpair to the main list */
TAILQ_REMOVE(&tqpair->group->await_req, tqpair, link);
TAILQ_INSERT_TAIL(&tqpair->group->qpairs, tqpair, link);
} else if (state == NVME_TCP_PDU_RECV_STATE_AWAIT_REQ) {
TAILQ_REMOVE(&tqpair->group->qpairs, tqpair, link);
TAILQ_INSERT_TAIL(&tqpair->group->await_req, tqpair, link);
}
SPDK_DEBUGLOG(nvmf_tcp, "tqpair(%p) recv state=%d\n", tqpair, state);
@ -1433,25 +1447,6 @@ nvmf_tcp_qpair_set_recv_state(struct spdk_nvmf_tcp_qpair *tqpair,
spdk_trace_record(TRACE_TCP_QP_RCV_STATE_CHANGE, tqpair->qpair.qid, 0, (uintptr_t)tqpair,
tqpair->recv_state);
switch (state) {
case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH:
case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH:
case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD:
break;
case NVME_TCP_PDU_RECV_STATE_AWAIT_REQ:
TAILQ_REMOVE(&tqpair->group->qpairs, tqpair, link);
TAILQ_INSERT_TAIL(&tqpair->group->await_req, tqpair, link);
break;
case NVME_TCP_PDU_RECV_STATE_ERROR:
case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY:
memset(tqpair->pdu_in_progress, 0, sizeof(*(tqpair->pdu_in_progress)));
break;
default:
SPDK_ERRLOG("The state(%d) is invalid\n", state);
abort();
break;
}
}
static int
@ -1571,8 +1566,6 @@ nvmf_tcp_capsule_cmd_payload_handle(struct spdk_nvmf_tcp_transport *ttransport,
goto err;
}
nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
rsp = &tcp_req->req.rsp->nvme_cpl;
if (spdk_unlikely(rsp->status.sc == SPDK_NVME_SC_COMMAND_TRANSIENT_TRANSPORT_ERROR)) {
nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_COMPLETE);
@ -1771,8 +1764,6 @@ nvmf_tcp_h2c_data_payload_handle(struct spdk_nvmf_tcp_transport *ttransport,
tcp_req->h2c_offset += pdu->data_len;
nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
/* Wait for all of the data to arrive AND for the initial R2T PDU send to be
* acknowledged before moving on. */
if (tcp_req->h2c_offset == tcp_req->req.length &&
@ -1836,10 +1827,10 @@ nvmf_tcp_h2c_term_req_payload_handle(struct spdk_nvmf_tcp_qpair *tqpair,
}
static void
_nvmf_tcp_pdu_payload_handle(struct spdk_nvmf_tcp_qpair *tqpair,
struct spdk_nvmf_tcp_transport *ttransport)
_nvmf_tcp_pdu_payload_handle(struct spdk_nvmf_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu)
{
struct nvme_tcp_pdu *pdu = tqpair->pdu_in_progress;
struct spdk_nvmf_tcp_transport *ttransport = SPDK_CONTAINEROF(tqpair->qpair.transport,
struct spdk_nvmf_tcp_transport, transport);
switch (pdu->hdr.common.pdu_type) {
case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD:
@ -1855,40 +1846,60 @@ _nvmf_tcp_pdu_payload_handle(struct spdk_nvmf_tcp_qpair *tqpair,
default:
/* The code should not go to here */
SPDK_ERRLOG("The code should not go to here\n");
SPDK_ERRLOG("ERROR pdu type %d\n", pdu->hdr.common.pdu_type);
break;
}
SLIST_INSERT_HEAD(&tqpair->tcp_pdu_free_queue, pdu, slist);
}
static void
nvmf_tcp_pdu_payload_handle(struct spdk_nvmf_tcp_qpair *tqpair,
struct spdk_nvmf_tcp_transport *ttransport)
data_crc32_calc_done(void *cb_arg, int status)
{
int rc = 0;
struct nvme_tcp_pdu *pdu;
uint32_t crc32c;
struct nvme_tcp_pdu *pdu = cb_arg;
struct spdk_nvmf_tcp_qpair *tqpair = pdu->qpair;
struct spdk_nvmf_tcp_req *tcp_req;
struct spdk_nvme_cpl *rsp;
assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD);
pdu = tqpair->pdu_in_progress;
/* async crc32 calculation is failed and use direct calculation to check */
if (spdk_unlikely(status)) {
SPDK_ERRLOG("Data digest on tqpair=(%p) with pdu=%p failed to be calculated asynchronously\n",
tqpair, pdu);
pdu->data_digest_crc32 = nvme_tcp_pdu_calc_data_digest(pdu);
}
pdu->data_digest_crc32 ^= SPDK_CRC32C_XOR;
if (!MATCH_DIGEST_WORD(pdu->data_digest, pdu->data_digest_crc32)) {
SPDK_ERRLOG("Data digest error on tqpair=(%p) with pdu=%p\n", tqpair, pdu);
tcp_req = pdu->req;
assert(tcp_req != NULL);
rsp = &tcp_req->req.rsp->nvme_cpl;
rsp->status.sc = SPDK_NVME_SC_COMMAND_TRANSIENT_TRANSPORT_ERROR;
}
_nvmf_tcp_pdu_payload_handle(tqpair, pdu);
}
static void
nvmf_tcp_pdu_payload_handle(struct spdk_nvmf_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu)
{
int rc = 0;
assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD);
tqpair->pdu_in_progress = NULL;
nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
SPDK_DEBUGLOG(nvmf_tcp, "enter\n");
/* check data digest if need */
if (pdu->ddgst_enable) {
crc32c = nvme_tcp_pdu_calc_data_digest(pdu);
crc32c = crc32c ^ SPDK_CRC32C_XOR;
rc = MATCH_DIGEST_WORD(pdu->data_digest, crc32c);
if (rc == 0) {
SPDK_ERRLOG("Data digest error on tqpair=(%p) with pdu=%p\n", tqpair, pdu);
tcp_req = pdu->req;
assert(tcp_req != NULL);
rsp = &tcp_req->req.rsp->nvme_cpl;
rsp->status.sc = SPDK_NVME_SC_COMMAND_TRANSIENT_TRANSPORT_ERROR;
if (!pdu->dif_ctx && tqpair->group && (pdu->data_len % SPDK_NVME_TCP_DIGEST_ALIGNMENT == 0)) {
rc = spdk_accel_submit_crc32cv(tqpair->group->accel_channel, &pdu->data_digest_crc32, pdu->data_iov,
pdu->data_iovcnt, 0, data_crc32_calc_done, pdu);
if (spdk_likely(rc == 0)) {
return;
}
} else {
pdu->data_digest_crc32 = nvme_tcp_pdu_calc_data_digest(pdu);
}
data_crc32_calc_done(pdu, rc);
} else {
_nvmf_tcp_pdu_payload_handle(tqpair, pdu);
}
_nvmf_tcp_pdu_payload_handle(tqpair, ttransport);
}
static void
@ -2028,7 +2039,7 @@ nvmf_tcp_pdu_ch_handle(struct spdk_nvmf_tcp_qpair *tqpair)
assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH);
pdu = tqpair->pdu_in_progress;
assert(pdu);
if (pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_IC_REQ) {
if (tqpair->state != NVME_TCP_QPAIR_STATE_INVALID) {
SPDK_ERRLOG("Already received ICreq PDU, and reject this pdu=%p\n", pdu);
@ -2141,9 +2152,20 @@ nvmf_tcp_sock_process(struct spdk_nvmf_tcp_qpair *tqpair)
SPDK_DEBUGLOG(nvmf_tcp, "tqpair(%p) recv pdu entering state %d\n", tqpair, prev_state);
pdu = tqpair->pdu_in_progress;
assert(pdu || tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
switch (tqpair->recv_state) {
/* Wait for the common header */
case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY:
if (!pdu) {
pdu = SLIST_FIRST(&tqpair->tcp_pdu_free_queue);
if (spdk_unlikely(!pdu)) {
return NVME_TCP_PDU_IN_PROGRESS;
}
SLIST_REMOVE_HEAD(&tqpair->tcp_pdu_free_queue, slist);
tqpair->pdu_in_progress = pdu;
}
memset(pdu, 0, offsetof(struct nvme_tcp_pdu, qpair));
/* FALLTHROUGH */
case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH:
if (spdk_unlikely(tqpair->state == NVME_TCP_QPAIR_STATE_INITIALIZING)) {
return rc;
@ -2225,7 +2247,7 @@ nvmf_tcp_sock_process(struct spdk_nvmf_tcp_qpair *tqpair)
}
/* All of this PDU has now been read from the socket. */
nvmf_tcp_pdu_payload_handle(tqpair, ttransport);
nvmf_tcp_pdu_payload_handle(tqpair, pdu);
break;
case NVME_TCP_PDU_RECV_STATE_ERROR:
if (!spdk_sock_is_connected(tqpair->sock)) {
@ -2233,8 +2255,8 @@ nvmf_tcp_sock_process(struct spdk_nvmf_tcp_qpair *tqpair)
}
break;
default:
assert(0);
SPDK_ERRLOG("code should not come to here");
SPDK_ERRLOG("The state(%d) is invalid\n", tqpair->recv_state);
abort();
break;
}
} while (tqpair->recv_state != prev_state);

View File

@ -781,9 +781,9 @@ test_nvmf_tcp_qpair_init_mem_resource(void)
CU_ASSERT(tqpair->reqs[127].req.cmd == (void *)&tqpair->reqs[127].cmd);
CU_ASSERT(tqpair->reqs[127].state == TCP_REQUEST_STATE_FREE);
CU_ASSERT(tqpair->state_cntr[TCP_REQUEST_STATE_FREE] == SPDK_NVMF_TCP_DEFAULT_MAX_QUEUE_DEPTH);
CU_ASSERT(tqpair->mgmt_pdu == &tqpair->pdus[SPDK_NVMF_TCP_DEFAULT_MAX_QUEUE_DEPTH]);
CU_ASSERT(tqpair->mgmt_pdu == &tqpair->pdus[2 * SPDK_NVMF_TCP_DEFAULT_MAX_QUEUE_DEPTH]);
CU_ASSERT(tqpair->mgmt_pdu->qpair == tqpair);
CU_ASSERT(tqpair->pdu_in_progress == &tqpair->pdus[SPDK_NVMF_TCP_DEFAULT_MAX_QUEUE_DEPTH + 1]);
CU_ASSERT(tqpair->pdu_in_progress == &tqpair->pdus[2 * SPDK_NVMF_TCP_DEFAULT_MAX_QUEUE_DEPTH - 1]);
CU_ASSERT(tqpair->recv_buf_size == (4096 + sizeof(struct spdk_nvme_tcp_cmd) + 2 *
SPDK_NVME_TCP_DIGEST_LEN) * SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR);