From 6a77723eb4e7a1404279f40eba6054b1fc8cc569 Mon Sep 17 00:00:00 2001 From: Alexey Marchuk Date: Thu, 3 Oct 2019 11:00:22 +0000 Subject: [PATCH] rdma: Use WRs chaining when DIF is enabled This patch adds the following: 1. Change signature of nvmf_rdma_fill_wr_sge - pass ibv_send_wr ** in order to update caller's variable, add a pointer to the number of extra WRs 2. Add a check for the number of requested WRs to nvmf_request_alloc_wrs 3. Add a function to update remote address offset Change-Id: I26f6567211b3ebfdb4981a7499f6df25e32cbb3a Signed-off-by: Alexey Marchuk Signed-off-by: Sasha Kotchubievsky Signed-off-by: Evgenii Kochetov Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/470475 Tested-by: SPDK CI Jenkins Reviewed-by: Ben Walker Reviewed-by: Shuhei Matsumoto --- lib/nvmf/rdma.c | 59 ++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 51 insertions(+), 8 deletions(-) diff --git a/lib/nvmf/rdma.c b/lib/nvmf/rdma.c index 67c081c7f..b16b1a752 100644 --- a/lib/nvmf/rdma.c +++ b/lib/nvmf/rdma.c @@ -1483,6 +1483,12 @@ nvmf_request_alloc_wrs(struct spdk_nvmf_rdma_transport *rtransport, struct spdk_nvmf_rdma_request_data *current_data_wr; uint32_t i; + if (num_sgl_descriptors > SPDK_NVMF_MAX_SGL_ENTRIES) { + SPDK_ERRLOG("Requested too much entries (%u), the limit is %u\n", + num_sgl_descriptors, SPDK_NVMF_MAX_SGL_ENTRIES); + return -EINVAL; + } + if (spdk_mempool_get_bulk(rtransport->data_wr_pool, (void **)work_requests, num_sgl_descriptors)) { return -ENOMEM; } @@ -1513,6 +1519,25 @@ nvmf_rdma_setup_request(struct spdk_nvmf_rdma_request *rdma_req) nvmf_rdma_setup_wr(wr, &rdma_req->rsp.wr, rdma_req->req.xfer); } +static inline void +nvmf_rdma_update_remote_addr(struct spdk_nvmf_rdma_request *rdma_req, uint32_t num_wrs) +{ + struct ibv_send_wr *wr = &rdma_req->data.wr; + struct spdk_nvme_sgl_descriptor *sgl = &rdma_req->req.cmd->nvme_cmd.dptr.sgl1; + uint32_t i; + int j; + uint64_t remote_addr_offset = 0; + + for (i = 0; i < num_wrs; ++i) { + wr->wr.rdma.rkey = sgl->keyed.key; + wr->wr.rdma.remote_addr = sgl->address + remote_addr_offset; + for (j = 0; j < wr->num_sge; ++j) { + remote_addr_offset += wr->sg_list[j].length; + } + wr = wr->next; + } +} + /* This function is used in the rare case that we have a buffer split over multiple memory regions. */ static int nvmf_rdma_replace_buffer(struct spdk_nvmf_rdma_poll_group *rgroup, void **buf) @@ -1568,10 +1593,12 @@ nvmf_rdma_get_lkey(struct spdk_nvmf_rdma_device *device, struct iovec *iov, static bool nvmf_rdma_fill_wr_sge(struct spdk_nvmf_rdma_device *device, - struct iovec *iov, struct ibv_send_wr *wr, + struct iovec *iov, struct ibv_send_wr **_wr, uint32_t *_remaining_data_block, uint32_t *_offset, + uint32_t *_num_extra_wrs, const struct spdk_dif_ctx *dif_ctx) { + struct ibv_send_wr *wr = *_wr; struct ibv_sge *sg_ele = &wr->sg_list[wr->num_sge]; uint32_t lkey = 0; uint32_t remaining, data_block_size, md_size, sge_len; @@ -1592,7 +1619,18 @@ nvmf_rdma_fill_wr_sge(struct spdk_nvmf_rdma_device *device, data_block_size = dif_ctx->block_size - dif_ctx->md_size; md_size = dif_ctx->md_size; - while (remaining && wr->num_sge < SPDK_NVMF_MAX_SGL_ENTRIES) { + while (remaining) { + if (wr->num_sge >= SPDK_NVMF_MAX_SGL_ENTRIES) { + if (*_num_extra_wrs > 0 && wr->next) { + *_wr = wr->next; + wr = *_wr; + wr->num_sge = 0; + sg_ele = &wr->sg_list[wr->num_sge]; + (*_num_extra_wrs)--; + } else { + break; + } + } sg_ele->lkey = lkey; sg_ele->addr = (uintptr_t)((char *)iov->iov_base + *_offset); sge_len = spdk_min(remaining, *_remaining_data_block); @@ -1628,7 +1666,8 @@ nvmf_rdma_fill_wr_sgl(struct spdk_nvmf_rdma_poll_group *rgroup, struct spdk_nvmf_rdma_device *device, struct spdk_nvmf_rdma_request *rdma_req, struct ibv_send_wr *wr, - uint32_t length) + uint32_t length, + uint32_t num_extra_wrs) { struct spdk_nvmf_request *req = &rdma_req->req; struct spdk_dif_ctx *dif_ctx = NULL; @@ -1642,9 +1681,9 @@ nvmf_rdma_fill_wr_sgl(struct spdk_nvmf_rdma_poll_group *rgroup, wr->num_sge = 0; - while (length && wr->num_sge < SPDK_NVMF_MAX_SGL_ENTRIES) { - while (spdk_unlikely(!nvmf_rdma_fill_wr_sge(device, &req->iov[rdma_req->iovpos], wr, - &remaining_data_block, &offset, dif_ctx))) { + while (length && (num_extra_wrs || wr->num_sge < SPDK_NVMF_MAX_SGL_ENTRIES)) { + while (spdk_unlikely(!nvmf_rdma_fill_wr_sge(device, &req->iov[rdma_req->iovpos], &wr, + &remaining_data_block, &offset, &num_extra_wrs, dif_ctx))) { if (nvmf_rdma_replace_buffer(rgroup, &req->buffers[rdma_req->iovpos]) == -ENOMEM) { return -ENOMEM; } @@ -1726,11 +1765,15 @@ spdk_nvmf_rdma_request_fill_iovs(struct spdk_nvmf_rdma_transport *rtransport, } } - rc = nvmf_rdma_fill_wr_sgl(rgroup, device, rdma_req, wr, length); + rc = nvmf_rdma_fill_wr_sgl(rgroup, device, rdma_req, wr, length, num_wrs - 1); if (spdk_unlikely(rc != 0)) { goto err_exit; } + if (spdk_unlikely(num_wrs > 1)) { + nvmf_rdma_update_remote_addr(rdma_req, num_wrs); + } + /* set the number of outstanding data WRs for this request. */ rdma_req->num_outstanding_data_wr = num_wrs; @@ -1808,7 +1851,7 @@ nvmf_rdma_request_fill_iovs_multi_sgl(struct spdk_nvmf_rdma_transport *rtranspor current_wr->num_sge = 0; - rc = nvmf_rdma_fill_wr_sgl(rgroup, device, rdma_req, current_wr, lengths[i]); + rc = nvmf_rdma_fill_wr_sgl(rgroup, device, rdma_req, current_wr, lengths[i], 0); if (rc != 0) { rc = -ENOMEM; goto err_exit;