From 269910c05cb0a2259814ca3beee85564d248b0ea Mon Sep 17 00:00:00 2001 From: Daniel Verkamp Date: Thu, 27 Jul 2017 18:32:55 -0700 Subject: [PATCH] nvme: refactor PRP building code This also changes the SGL -> PRP case to translate each 4K page from virtual to physical, in case the buffer is not physically contiguous. Change-Id: If027f9d656c52c56504f0c64cd4464e16440df63 Signed-off-by: Daniel Verkamp Reviewed-on: https://review.gerrithub.io/371616 Tested-by: SPDK Automated Test System Reviewed-by: Ben Walker --- lib/nvme/nvme_pcie.c | 196 +++++++++--------- test/unit/lib/nvme/nvme_pcie.c/nvme_pcie_ut.c | 142 ++++++++++++- 2 files changed, 231 insertions(+), 107 deletions(-) diff --git a/lib/nvme/nvme_pcie.c b/lib/nvme/nvme_pcie.c index 72a249f4f..32f47eda5 100644 --- a/lib/nvme/nvme_pcie.c +++ b/lib/nvme/nvme_pcie.c @@ -1518,6 +1518,82 @@ nvme_pcie_fail_request_bad_vtophys(struct spdk_nvme_qpair *qpair, struct nvme_tr 1 /* do not retry */, true); } +/* + * Append PRP list entries to describe a virtually contiguous buffer starting at virt_addr of len bytes. + * + * *prp_index will be updated to account for the number of PRP entries used. + */ +static int +nvme_pcie_prp_list_append(struct nvme_tracker *tr, uint32_t *prp_index, void *virt_addr, size_t len) +{ + struct spdk_nvme_cmd *cmd = &tr->req->cmd; + uint64_t phys_addr; + uint32_t i; + + SPDK_TRACELOG(SPDK_TRACE_NVME, "prp_index:%u virt_addr:%p len:%u\n", + *prp_index, virt_addr, (uint32_t)len); + + if (spdk_unlikely(((uintptr_t)virt_addr & 3) != 0)) { + SPDK_TRACELOG(SPDK_TRACE_NVME, "virt_addr %p not dword aligned\n", virt_addr); + return -EINVAL; + } + + i = *prp_index; + while (len) { + uint32_t seg_len; + + /* + * prp_index 0 is stored in prp1, and the rest are stored in the prp[] array, + * so prp_index == count is valid. + */ + if (spdk_unlikely(i > SPDK_COUNTOF(tr->u.prp))) { + SPDK_TRACELOG(SPDK_TRACE_NVME, "out of PRP entries\n"); + return -EINVAL; + } + + phys_addr = spdk_vtophys(virt_addr); + if (spdk_unlikely(phys_addr == SPDK_VTOPHYS_ERROR)) { + SPDK_TRACELOG(SPDK_TRACE_NVME, "vtophys(%p) failed\n", virt_addr); + return -EINVAL; + } + + if (i == 0) { + SPDK_TRACELOG(SPDK_TRACE_NVME, "prp1 = %p\n", (void *)phys_addr); + cmd->dptr.prp.prp1 = phys_addr; + seg_len = PAGE_SIZE - ((uintptr_t)virt_addr & (PAGE_SIZE - 1)); + } else { + if ((phys_addr & (PAGE_SIZE - 1)) != 0) { + SPDK_TRACELOG(SPDK_TRACE_NVME, "PRP %u not page aligned (%p)\n", + i, virt_addr); + return -EINVAL; + } + + SPDK_TRACELOG(SPDK_TRACE_NVME, "prp[%u] = %p\n", i - 1, (void *)phys_addr); + tr->u.prp[i - 1] = phys_addr; + seg_len = PAGE_SIZE; + } + + seg_len = spdk_min(seg_len, len); + virt_addr += seg_len; + len -= seg_len; + i++; + } + + cmd->psdt = SPDK_NVME_PSDT_PRP; + if (i <= 1) { + cmd->dptr.prp.prp2 = 0; + } else if (i == 2) { + cmd->dptr.prp.prp2 = tr->u.prp[0]; + SPDK_TRACELOG(SPDK_TRACE_NVME, "prp2 = %p\n", (void *)cmd->dptr.prp.prp2); + } else { + cmd->dptr.prp.prp2 = tr->prp_sgl_bus_addr; + SPDK_TRACELOG(SPDK_TRACE_NVME, "prp2 = %p (PRP list)\n", (void *)cmd->dptr.prp.prp2); + } + + *prp_index = i; + return 0; +} + /** * Build PRP list describing physically contiguous payload buffer. */ @@ -1525,51 +1601,14 @@ static int nvme_pcie_qpair_build_contig_request(struct spdk_nvme_qpair *qpair, struct nvme_request *req, struct nvme_tracker *tr) { - uint64_t phys_addr; - void *seg_addr; - uint32_t nseg, cur_nseg, modulo, unaligned; - void *md_payload; - void *payload = req->payload.u.contig + req->payload_offset; + uint32_t prp_index = 0; + int rc; - phys_addr = spdk_vtophys(payload); - if (phys_addr == SPDK_VTOPHYS_ERROR) { + rc = nvme_pcie_prp_list_append(tr, &prp_index, req->payload.u.contig + req->payload_offset, + req->payload_size); + if (rc) { nvme_pcie_fail_request_bad_vtophys(qpair, tr); - return -1; - } - nseg = req->payload_size >> spdk_u32log2(PAGE_SIZE); - modulo = req->payload_size & (PAGE_SIZE - 1); - unaligned = phys_addr & (PAGE_SIZE - 1); - if (modulo || unaligned) { - nseg += 1 + ((modulo + unaligned - 1) >> spdk_u32log2(PAGE_SIZE)); - } - - if (req->payload.md) { - md_payload = req->payload.md + req->md_offset; - tr->req->cmd.mptr = spdk_vtophys(md_payload); - if (tr->req->cmd.mptr == SPDK_VTOPHYS_ERROR) { - nvme_pcie_fail_request_bad_vtophys(qpair, tr); - return -1; - } - } - - tr->req->cmd.psdt = SPDK_NVME_PSDT_PRP; - tr->req->cmd.dptr.prp.prp1 = phys_addr; - if (nseg == 2) { - seg_addr = payload + PAGE_SIZE - unaligned; - tr->req->cmd.dptr.prp.prp2 = spdk_vtophys(seg_addr); - } else if (nseg > 2) { - cur_nseg = 1; - tr->req->cmd.dptr.prp.prp2 = (uint64_t)tr->prp_sgl_bus_addr; - while (cur_nseg < nseg) { - seg_addr = payload + cur_nseg * PAGE_SIZE - unaligned; - phys_addr = spdk_vtophys(seg_addr); - if (phys_addr == SPDK_VTOPHYS_ERROR) { - nvme_pcie_fail_request_bad_vtophys(qpair, tr); - return -1; - } - tr->u.prp[cur_nseg - 1] = phys_addr; - cur_nseg++; - } + return rc; } return 0; @@ -1663,11 +1702,8 @@ nvme_pcie_qpair_build_prps_sgl_request(struct spdk_nvme_qpair *qpair, struct nvm { int rc; void *virt_addr; - uint64_t phys_addr; - uint32_t data_transferred, remaining_transfer_len, length; - uint32_t nseg, cur_nseg, total_nseg, last_nseg, modulo, unaligned; - uint32_t sge_count = 0; - uint64_t prp2 = 0; + uint32_t remaining_transfer_len, length; + uint32_t prp_index = 0; /* * Build scattered payloads. @@ -1677,9 +1713,6 @@ nvme_pcie_qpair_build_prps_sgl_request(struct spdk_nvme_qpair *qpair, struct nvm req->payload.u.sgl.reset_sgl_fn(req->payload.u.sgl.cb_arg, req->payload_offset); remaining_transfer_len = req->payload_size; - total_nseg = 0; - last_nseg = 0; - while (remaining_transfer_len > 0) { assert(req->payload.u.sgl.next_sge_fn != NULL); rc = req->payload.u.sgl.next_sge_fn(req->payload.u.sgl.cb_arg, &virt_addr, &length); @@ -1688,66 +1721,23 @@ nvme_pcie_qpair_build_prps_sgl_request(struct spdk_nvme_qpair *qpair, struct nvm return -1; } - phys_addr = spdk_vtophys(virt_addr); - if (phys_addr == SPDK_VTOPHYS_ERROR) { - nvme_pcie_fail_request_bad_vtophys(qpair, tr); - return -1; - } + length = spdk_min(remaining_transfer_len, length); /* * Any incompatible sges should have been handled up in the splitting routine, * but assert here as an additional check. + * + * All SGEs except last must end on a page boundary. */ - assert((phys_addr & 0x3) == 0); /* Address must be dword aligned. */ - /* All SGEs except last must end on a page boundary. */ - assert((length >= remaining_transfer_len) || _is_page_aligned(phys_addr + length)); - /* All SGe except first must start on a page boundary. */ - assert((sge_count == 0) || _is_page_aligned(phys_addr)); + assert((length == remaining_transfer_len) || _is_page_aligned((uintptr_t)virt_addr + length)); - data_transferred = spdk_min(remaining_transfer_len, length); - - nseg = data_transferred >> spdk_u32log2(PAGE_SIZE); - modulo = data_transferred & (PAGE_SIZE - 1); - unaligned = phys_addr & (PAGE_SIZE - 1); - if (modulo || unaligned) { - nseg += 1 + ((modulo + unaligned - 1) >> spdk_u32log2(PAGE_SIZE)); + rc = nvme_pcie_prp_list_append(tr, &prp_index, virt_addr, length); + if (rc) { + nvme_pcie_fail_request_bad_vtophys(qpair, tr); + return rc; } - if (total_nseg == 0) { - req->cmd.psdt = SPDK_NVME_PSDT_PRP; - req->cmd.dptr.prp.prp1 = phys_addr; - phys_addr -= unaligned; - } - - total_nseg += nseg; - sge_count++; - remaining_transfer_len -= data_transferred; - - if (total_nseg == 2) { - if (sge_count == 1) - tr->req->cmd.dptr.prp.prp2 = phys_addr + PAGE_SIZE; - else if (sge_count == 2) - tr->req->cmd.dptr.prp.prp2 = phys_addr; - /* save prp2 value */ - prp2 = tr->req->cmd.dptr.prp.prp2; - } else if (total_nseg > 2) { - if (sge_count == 1) - cur_nseg = 1; - else - cur_nseg = 0; - - tr->req->cmd.dptr.prp.prp2 = (uint64_t)tr->prp_sgl_bus_addr; - while (cur_nseg < nseg) { - if (prp2) { - tr->u.prp[0] = prp2; - tr->u.prp[last_nseg + 1] = phys_addr + cur_nseg * PAGE_SIZE; - } else - tr->u.prp[last_nseg] = phys_addr + cur_nseg * PAGE_SIZE; - - last_nseg++; - cur_nseg++; - } - } + remaining_transfer_len -= length; } return 0; diff --git a/test/unit/lib/nvme/nvme_pcie.c/nvme_pcie_ut.c b/test/unit/lib/nvme/nvme_pcie.c/nvme_pcie_ut.c index 37350f04f..bfda812c0 100644 --- a/test/unit/lib/nvme/nvme_pcie.c/nvme_pcie_ut.c +++ b/test/unit/lib/nvme/nvme_pcie.c/nvme_pcie_ut.c @@ -642,6 +642,143 @@ static void test_nvme_qpair_destroy(void) } #endif +static void +prp_list_prep(struct nvme_tracker *tr, struct nvme_request *req, uint32_t *prp_index) +{ + memset(req, 0, sizeof(*req)); + memset(tr, 0, sizeof(*tr)); + tr->req = req; + tr->prp_sgl_bus_addr = 0xDEADBEEF; + *prp_index = 0; +} + +static void +test_prp_list_append(void) +{ + struct nvme_request req; + struct nvme_tracker tr; + uint32_t prp_index; + + /* Non-DWORD-aligned buffer (invalid) */ + prp_list_prep(&tr, &req, &prp_index); + CU_ASSERT(nvme_pcie_prp_list_append(&tr, &prp_index, (void *)0x100001, 0x1000) == -EINVAL); + + /* 512-byte buffer, 4K aligned */ + prp_list_prep(&tr, &req, &prp_index); + CU_ASSERT(nvme_pcie_prp_list_append(&tr, &prp_index, (void *)0x100000, 0x200) == 0); + CU_ASSERT(prp_index == 1); + CU_ASSERT(req.cmd.dptr.prp.prp1 == 0x100000); + + /* 512-byte buffer, non-4K-aligned */ + prp_list_prep(&tr, &req, &prp_index); + CU_ASSERT(nvme_pcie_prp_list_append(&tr, &prp_index, (void *)0x108000, 0x200) == 0); + CU_ASSERT(prp_index == 1); + CU_ASSERT(req.cmd.dptr.prp.prp1 == 0x108000); + + /* 4K buffer, 4K aligned */ + prp_list_prep(&tr, &req, &prp_index); + CU_ASSERT(nvme_pcie_prp_list_append(&tr, &prp_index, (void *)0x100000, 0x1000) == 0); + CU_ASSERT(prp_index == 1); + CU_ASSERT(req.cmd.dptr.prp.prp1 == 0x100000); + + /* 4K buffer, non-4K aligned */ + prp_list_prep(&tr, &req, &prp_index); + CU_ASSERT(nvme_pcie_prp_list_append(&tr, &prp_index, (void *)0x100800, 0x1000) == 0); + CU_ASSERT(prp_index == 2); + CU_ASSERT(req.cmd.dptr.prp.prp1 == 0x100800); + CU_ASSERT(req.cmd.dptr.prp.prp2 == 0x101000); + + /* 8K buffer, 4K aligned */ + prp_list_prep(&tr, &req, &prp_index); + CU_ASSERT(nvme_pcie_prp_list_append(&tr, &prp_index, (void *)0x100000, 0x2000) == 0); + CU_ASSERT(prp_index == 2); + CU_ASSERT(req.cmd.dptr.prp.prp1 == 0x100000); + CU_ASSERT(req.cmd.dptr.prp.prp2 == 0x101000); + + /* 8K buffer, non-4K aligned */ + prp_list_prep(&tr, &req, &prp_index); + CU_ASSERT(nvme_pcie_prp_list_append(&tr, &prp_index, (void *)0x100800, 0x2000) == 0); + CU_ASSERT(prp_index == 3); + CU_ASSERT(req.cmd.dptr.prp.prp1 == 0x100800); + CU_ASSERT(req.cmd.dptr.prp.prp2 == tr.prp_sgl_bus_addr); + CU_ASSERT(tr.u.prp[0] == 0x101000); + CU_ASSERT(tr.u.prp[1] == 0x102000); + + /* 12K buffer, 4K aligned */ + prp_list_prep(&tr, &req, &prp_index); + CU_ASSERT(nvme_pcie_prp_list_append(&tr, &prp_index, (void *)0x100000, 0x3000) == 0); + CU_ASSERT(prp_index == 3); + CU_ASSERT(req.cmd.dptr.prp.prp1 == 0x100000); + CU_ASSERT(req.cmd.dptr.prp.prp2 == tr.prp_sgl_bus_addr); + CU_ASSERT(tr.u.prp[0] == 0x101000); + CU_ASSERT(tr.u.prp[1] == 0x102000); + + /* 12K buffer, non-4K aligned */ + prp_list_prep(&tr, &req, &prp_index); + CU_ASSERT(nvme_pcie_prp_list_append(&tr, &prp_index, (void *)0x100800, 0x3000) == 0); + CU_ASSERT(prp_index == 4); + CU_ASSERT(req.cmd.dptr.prp.prp1 == 0x100800); + CU_ASSERT(req.cmd.dptr.prp.prp2 == tr.prp_sgl_bus_addr); + CU_ASSERT(tr.u.prp[0] == 0x101000); + CU_ASSERT(tr.u.prp[1] == 0x102000); + CU_ASSERT(tr.u.prp[2] == 0x103000); + + /* Two 4K buffers, both 4K aligned */ + prp_list_prep(&tr, &req, &prp_index); + CU_ASSERT(nvme_pcie_prp_list_append(&tr, &prp_index, (void *)0x100000, 0x1000) == 0); + CU_ASSERT(prp_index == 1); + CU_ASSERT(nvme_pcie_prp_list_append(&tr, &prp_index, (void *)0x900000, 0x1000) == 0); + CU_ASSERT(prp_index == 2); + CU_ASSERT(req.cmd.dptr.prp.prp1 == 0x100000); + CU_ASSERT(req.cmd.dptr.prp.prp2 == 0x900000); + + /* Two 4K buffers, first non-4K aligned, second 4K aligned */ + prp_list_prep(&tr, &req, &prp_index); + CU_ASSERT(nvme_pcie_prp_list_append(&tr, &prp_index, (void *)0x100800, 0x1000) == 0); + CU_ASSERT(prp_index == 2); + CU_ASSERT(nvme_pcie_prp_list_append(&tr, &prp_index, (void *)0x900000, 0x1000) == 0); + CU_ASSERT(prp_index == 3); + CU_ASSERT(req.cmd.dptr.prp.prp1 == 0x100800); + CU_ASSERT(req.cmd.dptr.prp.prp2 == tr.prp_sgl_bus_addr); + CU_ASSERT(tr.u.prp[0] == 0x101000); + CU_ASSERT(tr.u.prp[1] == 0x900000); + + /* Two 4K buffers, both non-4K aligned (invalid) */ + prp_list_prep(&tr, &req, &prp_index); + CU_ASSERT(nvme_pcie_prp_list_append(&tr, &prp_index, (void *)0x100800, 0x1000) == 0); + CU_ASSERT(prp_index == 2); + CU_ASSERT(nvme_pcie_prp_list_append(&tr, &prp_index, (void *)0x900800, 0x1000) == -EINVAL); + CU_ASSERT(prp_index == 2); + + /* 4K buffer, 4K aligned, but vtophys fails */ + ut_fail_vtophys = true; + prp_list_prep(&tr, &req, &prp_index); + CU_ASSERT(nvme_pcie_prp_list_append(&tr, &prp_index, (void *)0x100000, 0x1000) == -EINVAL); + ut_fail_vtophys = false; + + /* Largest aligned buffer that can be described in NVME_MAX_PRP_LIST_ENTRIES (plus PRP1) */ + prp_list_prep(&tr, &req, &prp_index); + CU_ASSERT(nvme_pcie_prp_list_append(&tr, &prp_index, (void *)0x100000, + (NVME_MAX_PRP_LIST_ENTRIES + 1) * 0x1000) == 0); + CU_ASSERT(prp_index == NVME_MAX_PRP_LIST_ENTRIES + 1); + + /* Largest non-4K-aligned buffer that can be described in NVME_MAX_PRP_LIST_ENTRIES (plus PRP1) */ + prp_list_prep(&tr, &req, &prp_index); + CU_ASSERT(nvme_pcie_prp_list_append(&tr, &prp_index, (void *)0x100800, + NVME_MAX_PRP_LIST_ENTRIES * 0x1000) == 0); + CU_ASSERT(prp_index == NVME_MAX_PRP_LIST_ENTRIES + 1); + + /* Buffer too large to be described in NVME_MAX_PRP_LIST_ENTRIES */ + prp_list_prep(&tr, &req, &prp_index); + CU_ASSERT(nvme_pcie_prp_list_append(&tr, &prp_index, (void *)0x100000, + (NVME_MAX_PRP_LIST_ENTRIES + 2) * 0x1000) == -EINVAL); + + /* Non-4K-aligned buffer too large to be described in NVME_MAX_PRP_LIST_ENTRIES */ + prp_list_prep(&tr, &req, &prp_index); + CU_ASSERT(nvme_pcie_prp_list_append(&tr, &prp_index, (void *)0x100800, + (NVME_MAX_PRP_LIST_ENTRIES + 1) * 0x1000) == -EINVAL); +} + int main(int argc, char **argv) { CU_pSuite suite = NULL; @@ -657,14 +794,11 @@ int main(int argc, char **argv) return CU_get_error(); } -#if 0 - if (CU_add_test(suite, "test3", test3) == NULL - || CU_add_test(suite, "test4", test4) == NULL + if (CU_add_test(suite, "prp_list_append", test_prp_list_append) == NULL ) { CU_cleanup_registry(); return CU_get_error(); } -#endif CU_basic_set_mode(CU_BRM_VERBOSE); CU_basic_run_tests();