nvme/tcp: Fix tcp_req->datao calculation issue.
When data digest is enabled for a nvme tcp qpair, we can use accel_fw to calculate the data crc32c. Then if there are multiple c2h pdus are coming, we can use both CPU resource directly and accel_fw framework to caculate the checksum. Then the datao value compare will not match since we will not update "datao" in the pdu coming order. For example, if we receive 4 pdus, named as A, B, C, D. offset data_len (in bytes) A: 0 8192 B: 8192 4096 C: 12288 8192 D: 20480 4096 For receving the pdu, we hope that we can continue exeution even if we use the offloading engine in accel_fw. Then in this situation, if Pdu(C) is offloaded by accel_fw. Then our logic will continue receving PDU(D). And according to the logic in our code, this time we leverage CPU to calculate crc32c (Because we only have one active pdu to receive data). Then we find the expected data offset is still 12288. Because "datao" in tcp_req will only be updated after calling nvme_tcp_c2h_data_payload_handle function. So while we enter nvme_tcp_c2h_data_hdr_handle function, we will find the expected datao value is not as expected compared with the data offset value contained in Pdu(D). So the solution is that we create a new variable "expected_datao" in tcp_req to do the comparation because we want to comply with the tp8000 spec and do the offset check. We still need use "datao" to count whether we receive the whole data or not. So we cannot reuse "datao" variable in an early way. Otherwise, we will release tcp_req structure early and cause another bug. PS: This bug was not found early because previously the sw path in accel_fw directly calculated the crc32c and called the user callback. Now we use a list and the poller to handle, then it triggers this issue. Definitely, it will be much easier to trigger this issue if we use real hardware engine. Fixes #2098 Signed-off-by: Ziye Yang <ziye.yang@intel.com> Change-Id: I10f5938a6342028d08d90820b2c14e4260134d77 Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/9612 Reviewed-by: Jim Harris <james.r.harris@intel.com> Reviewed-by: Changpeng Liu <changpeng.liu@intel.com> Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com> Reviewed-by: GangCao <gang.cao@intel.com> Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com> Community-CI: Mellanox Build Bot Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
This commit is contained in:
parent
0a3383eec2
commit
cee2db52bf
@ -129,6 +129,7 @@ struct nvme_tcp_req {
|
||||
uint16_t cid;
|
||||
uint16_t ttag;
|
||||
uint32_t datao;
|
||||
uint32_t expected_datao;
|
||||
uint32_t r2tl_remain;
|
||||
uint32_t active_r2ts;
|
||||
/* Used to hold a value received from subsequent R2T while we are still
|
||||
@ -204,6 +205,7 @@ nvme_tcp_req_get(struct nvme_tcp_qpair *tqpair)
|
||||
tcp_req->state = NVME_TCP_REQ_ACTIVE;
|
||||
TAILQ_REMOVE(&tqpair->free_reqs, tcp_req, link);
|
||||
tcp_req->datao = 0;
|
||||
tcp_req->expected_datao = 0;
|
||||
tcp_req->req = NULL;
|
||||
tcp_req->in_capsule_data = false;
|
||||
tcp_req->pdu_in_use = false;
|
||||
@ -1120,9 +1122,12 @@ nvme_tcp_pdu_payload_handle(struct nvme_tcp_qpair *tqpair,
|
||||
|
||||
SPDK_DEBUGLOG(nvme, "enter\n");
|
||||
|
||||
tcp_req = pdu->req;
|
||||
/* Increase the expected data offset */
|
||||
tcp_req->expected_datao += pdu->data_len;
|
||||
|
||||
/* check data digest if need */
|
||||
if (pdu->ddgst_enable) {
|
||||
tcp_req = pdu->req;
|
||||
tgroup = nvme_tcp_poll_group(tqpair->qpair.poll_group);
|
||||
/* Only suport this limitated case for the first step */
|
||||
if ((nvme_qpair_get_state(&tqpair->qpair) >= NVME_QPAIR_CONNECTED) &&
|
||||
@ -1329,8 +1334,8 @@ nvme_tcp_c2h_data_hdr_handle(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_pdu
|
||||
|
||||
}
|
||||
|
||||
SPDK_DEBUGLOG(nvme, "tcp_req(%p) on tqpair(%p): datao=%u, payload_size=%u\n",
|
||||
tcp_req, tqpair, tcp_req->datao, tcp_req->req->payload_size);
|
||||
SPDK_DEBUGLOG(nvme, "tcp_req(%p) on tqpair(%p): expected_datao=%u, payload_size=%u\n",
|
||||
tcp_req, tqpair, tcp_req->expected_datao, tcp_req->req->payload_size);
|
||||
|
||||
if (spdk_unlikely((flags & SPDK_NVME_TCP_C2H_DATA_FLAGS_SUCCESS) &&
|
||||
!(flags & SPDK_NVME_TCP_C2H_DATA_FLAGS_LAST_PDU))) {
|
||||
@ -1347,9 +1352,9 @@ nvme_tcp_c2h_data_hdr_handle(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_pdu
|
||||
goto end;
|
||||
}
|
||||
|
||||
if (tcp_req->datao != c2h_data->datao) {
|
||||
SPDK_ERRLOG("Invalid datao for tcp_req(%p), received datal(%u) != datao(%u) in tcp_req\n",
|
||||
tcp_req, c2h_data->datao, tcp_req->datao);
|
||||
if (tcp_req->expected_datao != c2h_data->datao) {
|
||||
SPDK_ERRLOG("Invalid datao for tcp_req(%p), received datal(%u) != expected datao(%u) in tcp_req\n",
|
||||
tcp_req, c2h_data->datao, tcp_req->expected_datao);
|
||||
fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
|
||||
error_offset = offsetof(struct spdk_nvme_tcp_c2h_data_hdr, datao);
|
||||
goto end;
|
||||
|
@ -1338,6 +1338,7 @@ test_nvme_tcp_pdu_payload_handle(void)
|
||||
tcp_req.state = NVME_TCP_REQ_ACTIVE;
|
||||
tcp_req.ordering.bits.send_ack = 1;
|
||||
|
||||
recv_pdu.req = &tcp_req;
|
||||
nvme_tcp_pdu_payload_handle(&tqpair, &reaped);
|
||||
CU_ASSERT(tqpair.recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
|
||||
CU_ASSERT(tcp_req.rsp.status.p == 0);
|
||||
@ -1351,6 +1352,7 @@ test_nvme_tcp_pdu_payload_handle(void)
|
||||
recv_pdu.hdr.term_req.fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
|
||||
tqpair.recv_state = NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD;
|
||||
|
||||
recv_pdu.req = &tcp_req;
|
||||
nvme_tcp_pdu_payload_handle(&tqpair, &reaped);
|
||||
CU_ASSERT(tqpair.recv_state == NVME_TCP_PDU_RECV_STATE_ERROR);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user