From 9db2571d321c95efa68264236f949f5f1cd7d644 Mon Sep 17 00:00:00 2001 From: Shuhei Matsumoto Date: Thu, 17 Mar 2022 17:33:50 +0900 Subject: [PATCH] nvmf/rdma: Split fill_wr_sgl() between DIF is enabled or not Extract the code for DIF from nvmf_rdma_fill_wr_sgl() into nvmf_rdma_fill_wr_sgl_with_dif(). Then clean up nvmf_rdma_request_fill_iovs() and nvmf_rdma_request_fill_iovs_multi_sgl(). Additionally, this patch has a bug fix. nvmf_rdma_fill_wr_sgl_with_dif() returned false if spdk_rdma_get_translation() failed. However, the type of return value of nvmf_rdma_fill_wr_sgl_with_dif() is not bool but int. The boolean false is 0 in integer. Hence in this case, nvmf_rdma_fill_wr_sgl_with_dif() returned 0 even if it failed. Change nvmf_rdma_fill_wr_sgl_with_dif() to return rc as is if spdk_rdma_get_translation() returns non-zero rc. Signed-off-by: Shuhei Matsumoto Change-Id: I71cc186458bfe8863964ab68e2d014c495312cd3 Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/11965 Community-CI: Broadcom CI Community-CI: Mellanox Build Bot Tested-by: SPDK CI Jenkins Reviewed-by: fengchunsong Reviewed-by: Jim Harris Reviewed-by: Aleksey Marchuk --- lib/nvmf/rdma.c | 179 +++++++++++++++++++++++++++++------------------- 1 file changed, 109 insertions(+), 70 deletions(-) diff --git a/lib/nvmf/rdma.c b/lib/nvmf/rdma.c index 9a67a8be9..f3605ceae 100644 --- a/lib/nvmf/rdma.c +++ b/lib/nvmf/rdma.c @@ -1428,94 +1428,123 @@ nvmf_rdma_fill_wr_sgl(struct spdk_nvmf_rdma_poll_group *rgroup, struct spdk_nvmf_rdma_device *device, struct spdk_nvmf_rdma_request *rdma_req, struct ibv_send_wr *wr, - uint32_t total_length, - uint32_t num_extra_wrs) + uint32_t total_length) { struct spdk_rdma_memory_translation mem_translation; - struct spdk_dif_ctx *dif_ctx = NULL; struct ibv_sge *sg_ele; struct iovec *iov; - uint32_t remaining_data_block = 0; uint32_t lkey, remaining; int rc; - if (spdk_unlikely(rdma_req->req.dif_enabled)) { - dif_ctx = &rdma_req->req.dif.dif_ctx; - remaining_data_block = dif_ctx->block_size - dif_ctx->md_size; + wr->num_sge = 0; + + while (total_length && wr->num_sge < SPDK_NVMF_MAX_SGL_ENTRIES) { + iov = &rdma_req->req.iov[rdma_req->iovpos]; + rc = spdk_rdma_get_translation(device->map, iov->iov_base, iov->iov_len, &mem_translation); + if (spdk_unlikely(rc)) { + return rc; + } + + lkey = spdk_rdma_memory_translation_get_lkey(&mem_translation); + sg_ele = &wr->sg_list[wr->num_sge]; + remaining = spdk_min((uint32_t)iov->iov_len - rdma_req->offset, total_length); + + sg_ele->lkey = lkey; + sg_ele->addr = (uintptr_t)iov->iov_base + rdma_req->offset; + sg_ele->length = remaining; + SPDK_DEBUGLOG(rdma, "sge[%d] %p addr 0x%"PRIx64", len %u\n", wr->num_sge, sg_ele, sg_ele->addr, + sg_ele->length); + rdma_req->offset += sg_ele->length; + total_length -= sg_ele->length; + wr->num_sge++; + + if (rdma_req->offset == iov->iov_len) { + rdma_req->offset = 0; + rdma_req->iovpos++; + } } + if (total_length) { + SPDK_ERRLOG("Not enough SG entries to hold data buffer\n"); + return -EINVAL; + } + + return 0; +} + +static int +nvmf_rdma_fill_wr_sgl_with_dif(struct spdk_nvmf_rdma_poll_group *rgroup, + struct spdk_nvmf_rdma_device *device, + struct spdk_nvmf_rdma_request *rdma_req, + struct ibv_send_wr *wr, + uint32_t total_length, + uint32_t num_extra_wrs) +{ + struct spdk_rdma_memory_translation mem_translation; + struct spdk_dif_ctx *dif_ctx = &rdma_req->req.dif.dif_ctx; + struct ibv_sge *sg_ele; + struct iovec *iov; + uint32_t lkey, remaining; + uint32_t remaining_data_block, data_block_size, md_size; + uint32_t sge_len; + int rc; + + data_block_size = dif_ctx->block_size - dif_ctx->md_size; + md_size = dif_ctx->md_size; + remaining_data_block = data_block_size; + wr->num_sge = 0; while (total_length && (num_extra_wrs || wr->num_sge < SPDK_NVMF_MAX_SGL_ENTRIES)) { iov = &rdma_req->req.iov[rdma_req->iovpos]; rc = spdk_rdma_get_translation(device->map, iov->iov_base, iov->iov_len, &mem_translation); if (spdk_unlikely(rc)) { - return false; + return rc; } lkey = spdk_rdma_memory_translation_get_lkey(&mem_translation); sg_ele = &wr->sg_list[wr->num_sge]; remaining = spdk_min((uint32_t)iov->iov_len - rdma_req->offset, total_length); - if (spdk_likely(!dif_ctx)) { + while (remaining) { + if (wr->num_sge >= SPDK_NVMF_MAX_SGL_ENTRIES) { + if (num_extra_wrs > 0 && wr->next) { + wr = wr->next; + wr->num_sge = 0; + sg_ele = &wr->sg_list[wr->num_sge]; + num_extra_wrs--; + } else { + break; + } + } sg_ele->lkey = lkey; - sg_ele->addr = (uintptr_t)iov->iov_base + rdma_req->offset; - sg_ele->length = remaining; - SPDK_DEBUGLOG(rdma, "sge[%d] %p addr 0x%"PRIx64", len %u\n", wr->num_sge, sg_ele, sg_ele->addr, - sg_ele->length); - rdma_req->offset += sg_ele->length; - total_length -= sg_ele->length; + sg_ele->addr = (uintptr_t)((char *)iov->iov_base + rdma_req->offset); + sge_len = spdk_min(remaining, remaining_data_block); + sg_ele->length = sge_len; + SPDK_DEBUGLOG(rdma, "sge[%d] %p addr 0x%"PRIx64", len %u\n", wr->num_sge, sg_ele, + sg_ele->addr, sg_ele->length); + remaining -= sge_len; + remaining_data_block -= sge_len; + rdma_req->offset += sge_len; + total_length -= sge_len; + + sg_ele++; wr->num_sge++; - if (rdma_req->offset == iov->iov_len) { - rdma_req->offset = 0; - rdma_req->iovpos++; + if (remaining_data_block == 0) { + /* skip metadata */ + rdma_req->offset += md_size; + total_length -= md_size; + /* Metadata that do not fit this IO buffer will be included in the next IO buffer */ + remaining -= spdk_min(remaining, md_size); + remaining_data_block = data_block_size; } - } else { - uint32_t data_block_size = dif_ctx->block_size - dif_ctx->md_size; - uint32_t md_size = dif_ctx->md_size; - uint32_t sge_len; - while (remaining) { - if (wr->num_sge >= SPDK_NVMF_MAX_SGL_ENTRIES) { - if (num_extra_wrs > 0 && wr->next) { - wr = wr->next; - wr->num_sge = 0; - sg_ele = &wr->sg_list[wr->num_sge]; - num_extra_wrs--; - } else { - break; - } - } - sg_ele->lkey = lkey; - sg_ele->addr = (uintptr_t)((char *)iov->iov_base + rdma_req->offset); - sge_len = spdk_min(remaining, remaining_data_block); - sg_ele->length = sge_len; - SPDK_DEBUGLOG(rdma, "sge[%d] %p addr 0x%"PRIx64", len %u\n", wr->num_sge, sg_ele, sg_ele->addr, - sg_ele->length); - remaining -= sge_len; - remaining_data_block -= sge_len; - rdma_req->offset += sge_len; - total_length -= sge_len; - - sg_ele++; - wr->num_sge++; - - if (remaining_data_block == 0) { - /* skip metadata */ - rdma_req->offset += md_size; - total_length -= md_size; - /* Metadata that do not fit this IO buffer will be included in the next IO buffer */ - remaining -= spdk_min(remaining, md_size); - remaining_data_block = data_block_size; - } - - if (remaining == 0) { - /* By subtracting the size of the last IOV from the offset, we ensure that we skip - the remaining metadata bits at the beginning of the next buffer */ - rdma_req->offset -= spdk_min(iov->iov_len, rdma_req->offset); - rdma_req->iovpos++; - } + if (remaining == 0) { + /* By subtracting the size of the last IOV from the offset, we ensure that we skip + the remaining metadata bits at the beginning of the next buffer */ + rdma_req->offset -= spdk_min(iov->iov_len, rdma_req->offset); + rdma_req->iovpos++; } } } @@ -1587,15 +1616,20 @@ nvmf_rdma_request_fill_iovs(struct spdk_nvmf_rdma_transport *rtransport, goto err_exit; } } - } - rc = nvmf_rdma_fill_wr_sgl(rgroup, device, rdma_req, wr, length, num_wrs - 1); - if (spdk_unlikely(rc != 0)) { - goto err_exit; - } + rc = nvmf_rdma_fill_wr_sgl_with_dif(rgroup, device, rdma_req, wr, length, num_wrs - 1); + if (spdk_unlikely(rc != 0)) { + goto err_exit; + } - if (spdk_unlikely(num_wrs > 1)) { - nvmf_rdma_update_remote_addr(rdma_req, num_wrs); + if (num_wrs > 1) { + nvmf_rdma_update_remote_addr(rdma_req, num_wrs); + } + } else { + rc = nvmf_rdma_fill_wr_sgl(rgroup, device, rdma_req, wr, length); + if (spdk_unlikely(rc != 0)) { + goto err_exit; + } } /* set the number of outstanding data WRs for this request. */ @@ -1680,7 +1714,12 @@ nvmf_rdma_request_fill_iovs_multi_sgl(struct spdk_nvmf_rdma_transport *rtranspor goto err_exit; } - rc = nvmf_rdma_fill_wr_sgl(rgroup, device, rdma_req, current_wr, lengths[i], 0); + if (spdk_likely(!req->dif_enabled)) { + rc = nvmf_rdma_fill_wr_sgl(rgroup, device, rdma_req, current_wr, lengths[i]); + } else { + rc = nvmf_rdma_fill_wr_sgl_with_dif(rgroup, device, rdma_req, current_wr, + lengths[i], 0); + } if (rc != 0) { rc = -ENOMEM; goto err_exit;