diff --git a/include/spdk/nvme.h b/include/spdk/nvme.h index 67b31e0b0..ba3d5f1d3 100644 --- a/include/spdk/nvme.h +++ b/include/spdk/nvme.h @@ -364,6 +364,23 @@ enum nvme_namespace_flags { */ uint32_t nvme_ns_get_flags(struct nvme_namespace *ns); +/** + * Restart the SGL walk to the specified offset when the command has scattered payloads. + * + * The cb_arg parameter is the value passed to readv/writev. + */ +typedef void (*nvme_req_reset_sgl_fn_t)(void *cb_arg, uint32_t offset); + +/** + * Fill out *address and *length with the current SGL entry and advance to the next + * entry for the next time the callback is invoked. + * + * The cb_arg parameter is the value passed to readv/writev. + * The address parameter contains the physical address of this segment. + * The length parameter contains the length of this physical segment. + */ +typedef int (*nvme_req_next_sge_fn_t)(void *cb_arg, uint64_t *address, uint32_t *length); + /** * \brief Submits a write I/O to the specified NVMe namespace. * @@ -384,6 +401,29 @@ int nvme_ns_cmd_write(struct nvme_namespace *ns, void *payload, uint64_t lba, uint32_t lba_count, nvme_cb_fn_t cb_fn, void *cb_arg); +/** + * \brief Submits a write I/O to the specified NVMe namespace. + * + * \param ns NVMe namespace to submit the write I/O + * \param lba starting LBA to write the data + * \param lba_count length (in sectors) for the write operation + * \param cb_fn callback function to invoke when the I/O is completed + * \param cb_arg argument to pass to the callback function + * \param reset_sgl_fn callback function to reset scattered payload + * \param next_sge_fn callback function to iterate each scattered + * payload memory segment + * + * \return 0 if successfully submitted, ENOMEM if an nvme_request + * structure cannot be allocated for the I/O request + * + * This function is thread safe and can be called at any point after + * nvme_register_io_thread(). + */ +int nvme_ns_cmd_writev(struct nvme_namespace *ns, uint64_t lba, uint32_t lba_count, + nvme_cb_fn_t cb_fn, void *cb_arg, + nvme_req_reset_sgl_fn_t reset_sgl_fn, + nvme_req_next_sge_fn_t next_sge_fn); + /** * \brief Submits a read I/O to the specified NVMe namespace. * @@ -404,6 +444,30 @@ int nvme_ns_cmd_read(struct nvme_namespace *ns, void *payload, uint64_t lba, uint32_t lba_count, nvme_cb_fn_t cb_fn, void *cb_arg); +/** + * \brief Submits a read I/O to the specified NVMe namespace. + * + * \param ns NVMe namespace to submit the read I/O + * \param lba starting LBA to read the data + * \param lba_count length (in sectors) for the read operation + * \param cb_fn callback function to invoke when the I/O is completed + * \param cb_arg argument to pass to the callback function + * \param reset_sgl_fn callback function to reset scattered payload + * \param next_sge_fn callback function to iterate each scattered + * payload memory segment + * + * \return 0 if successfully submitted, ENOMEM if an nvme_request + * structure cannot be allocated for the I/O request + * + * This function is thread safe and can be called at any point after + * nvme_register_io_thread(). + */ +int nvme_ns_cmd_readv(struct nvme_namespace *ns, uint64_t lba, uint32_t lba_count, + nvme_cb_fn_t cb_fn, void *cb_arg, + nvme_req_reset_sgl_fn_t reset_sgl_fn, + nvme_req_next_sge_fn_t next_sge_fn); + + /** * \brief Submits a deallocation request to the specified NVMe namespace. * diff --git a/lib/nvme/nvme.c b/lib/nvme/nvme.c index 4ca915cbe..06c15c7cb 100644 --- a/lib/nvme/nvme.c +++ b/lib/nvme/nvme.c @@ -145,16 +145,11 @@ nvme_allocate_request(void *payload, uint32_t payload_size, req->cb_fn = cb_fn; req->cb_arg = cb_arg; req->timeout = true; - nvme_assert((payload == NULL && payload_size == 0) || - (payload != NULL && payload_size != 0), - ("Invalid argument combination of payload and payload_size\n")); - if (payload == NULL || payload_size == 0) { - req->u.payload = NULL; - req->payload_size = 0; - } else { - req->u.payload = payload; - req->payload_size = payload_size; - } + req->sgl_offset = 0; + req->parent = NULL; + + req->u.payload = payload; + req->payload_size = payload_size; return req; } diff --git a/lib/nvme/nvme_internal.h b/lib/nvme/nvme_internal.h index 6e15dd197..91081ef22 100644 --- a/lib/nvme/nvme_internal.h +++ b/lib/nvme/nvme_internal.h @@ -159,6 +159,13 @@ struct nvme_request { * status once all child requests are completed. */ struct nvme_completion parent_status; + + /** + * Functions for retrieving physical addresses for scattered payloads. + */ + nvme_req_reset_sgl_fn_t reset_sgl_fn; + nvme_req_next_sge_fn_t next_sge_fn; + uint32_t sgl_offset; }; struct nvme_completion_poll_status { diff --git a/lib/nvme/nvme_ns_cmd.c b/lib/nvme/nvme_ns_cmd.c index 069e2880c..3e3ec4749 100644 --- a/lib/nvme/nvme_ns_cmd.c +++ b/lib/nvme/nvme_ns_cmd.c @@ -41,7 +41,8 @@ static struct nvme_request * _nvme_ns_cmd_rw(struct nvme_namespace *ns, void *payload, uint64_t lba, uint32_t lba_count, nvme_cb_fn_t cb_fn, void *cb_arg, - uint32_t opc); + uint32_t opc, nvme_req_reset_sgl_fn_t reset_sgl_fn, + nvme_req_next_sge_fn_t next_sge_fn); static void nvme_cb_complete_child(void *child_arg, const struct nvme_completion *cpl) @@ -91,10 +92,13 @@ _nvme_ns_cmd_split_request(struct nvme_namespace *ns, void *payload, uint64_t lba, uint32_t lba_count, nvme_cb_fn_t cb_fn, void *cb_arg, uint32_t opc, struct nvme_request *req, - uint32_t sectors_per_max_io, uint32_t sector_mask) + uint32_t sectors_per_max_io, uint32_t sector_mask, + nvme_req_reset_sgl_fn_t reset_sgl_fn, + nvme_req_next_sge_fn_t next_sge_fn) { uint32_t sector_size = ns->sector_size; uint32_t remaining_lba_count = lba_count; + uint32_t offset = 0; struct nvme_request *child; while (remaining_lba_count > 0) { @@ -102,7 +106,7 @@ _nvme_ns_cmd_split_request(struct nvme_namespace *ns, void *payload, lba_count = nvme_min(remaining_lba_count, lba_count); child = _nvme_ns_cmd_rw(ns, payload, lba, lba_count, cb_fn, - cb_arg, opc); + cb_arg, opc, reset_sgl_fn, next_sge_fn); if (child == NULL) { nvme_free_request(req); return NULL; @@ -110,7 +114,11 @@ _nvme_ns_cmd_split_request(struct nvme_namespace *ns, void *payload, nvme_request_add_child(req, child); remaining_lba_count -= lba_count; lba += lba_count; - payload = (void *)((uintptr_t)payload + (lba_count * sector_size)); + if (req->u.payload == NULL) { + child->sgl_offset = offset; + offset += lba_count * ns->sector_size; + } else + payload = (void *)((uintptr_t)payload + (lba_count * sector_size)); } return req; @@ -119,7 +127,8 @@ _nvme_ns_cmd_split_request(struct nvme_namespace *ns, void *payload, static struct nvme_request * _nvme_ns_cmd_rw(struct nvme_namespace *ns, void *payload, uint64_t lba, uint32_t lba_count, nvme_cb_fn_t cb_fn, void *cb_arg, - uint32_t opc) + uint32_t opc, nvme_req_reset_sgl_fn_t reset_sgl_fn, + nvme_req_next_sge_fn_t next_sge_fn) { struct nvme_request *req; struct nvme_command *cmd; @@ -137,6 +146,9 @@ _nvme_ns_cmd_rw(struct nvme_namespace *ns, void *payload, uint64_t lba, return NULL; } + req->reset_sgl_fn = reset_sgl_fn; + req->next_sge_fn = next_sge_fn; + /* * Intel DC P3*00 NVMe controllers benefit from driver-assisted striping. * If this controller defines a stripe boundary and this I/O spans a stripe @@ -147,10 +159,12 @@ _nvme_ns_cmd_rw(struct nvme_namespace *ns, void *payload, uint64_t lba, (((lba & (sectors_per_stripe - 1)) + lba_count) > sectors_per_stripe)) { return _nvme_ns_cmd_split_request(ns, payload, lba, lba_count, cb_fn, cb_arg, opc, - req, sectors_per_stripe, sectors_per_stripe - 1); + req, sectors_per_stripe, sectors_per_stripe - 1, + reset_sgl_fn, next_sge_fn); } else if (lba_count > sectors_per_max_io) { return _nvme_ns_cmd_split_request(ns, payload, lba, lba_count, cb_fn, cb_arg, opc, - req, sectors_per_max_io, 0); + req, sectors_per_max_io, 0, + reset_sgl_fn, next_sge_fn); } else { cmd = &req->cmd; cmd->opc = opc; @@ -170,7 +184,25 @@ nvme_ns_cmd_read(struct nvme_namespace *ns, void *payload, uint64_t lba, { struct nvme_request *req; - req = _nvme_ns_cmd_rw(ns, payload, lba, lba_count, cb_fn, cb_arg, NVME_OPC_READ); + req = _nvme_ns_cmd_rw(ns, payload, lba, lba_count, cb_fn, cb_arg, NVME_OPC_READ, NULL, NULL); + if (req != NULL) { + nvme_ctrlr_submit_io_request(ns->ctrlr, req); + return 0; + } else { + return ENOMEM; + } +} + +int +nvme_ns_cmd_readv(struct nvme_namespace *ns, uint64_t lba, uint32_t lba_count, + nvme_cb_fn_t cb_fn, void *cb_arg, + nvme_req_reset_sgl_fn_t reset_sgl_fn, + nvme_req_next_sge_fn_t next_sge_fn) +{ + struct nvme_request *req; + + req = _nvme_ns_cmd_rw(ns, NULL, lba, lba_count, cb_fn, cb_arg, NVME_OPC_READ, reset_sgl_fn, + next_sge_fn); if (req != NULL) { nvme_ctrlr_submit_io_request(ns->ctrlr, req); return 0; @@ -185,7 +217,25 @@ nvme_ns_cmd_write(struct nvme_namespace *ns, void *payload, uint64_t lba, { struct nvme_request *req; - req = _nvme_ns_cmd_rw(ns, payload, lba, lba_count, cb_fn, cb_arg, NVME_OPC_WRITE); + req = _nvme_ns_cmd_rw(ns, payload, lba, lba_count, cb_fn, cb_arg, NVME_OPC_WRITE, NULL, NULL); + if (req != NULL) { + nvme_ctrlr_submit_io_request(ns->ctrlr, req); + return 0; + } else { + return ENOMEM; + } +} + +int +nvme_ns_cmd_writev(struct nvme_namespace *ns, uint64_t lba, uint32_t lba_count, + nvme_cb_fn_t cb_fn, void *cb_arg, + nvme_req_reset_sgl_fn_t reset_sgl_fn, + nvme_req_next_sge_fn_t next_sge_fn) +{ + struct nvme_request *req; + + req = _nvme_ns_cmd_rw(ns, NULL, lba, lba_count, cb_fn, cb_arg, NVME_OPC_WRITE, reset_sgl_fn, + next_sge_fn); if (req != NULL) { nvme_ctrlr_submit_io_request(ns->ctrlr, req); return 0; diff --git a/lib/nvme/nvme_qpair.c b/lib/nvme/nvme_qpair.c index 6f23d91a2..dea80621c 100644 --- a/lib/nvme/nvme_qpair.c +++ b/lib/nvme/nvme_qpair.c @@ -668,9 +668,94 @@ _nvme_fail_request_ctrlr_failed(struct nvme_qpair *qpair, struct nvme_request *r NVME_SC_ABORTED_BY_REQUEST, true); } +static int +_nvme_qpair_build_sgl_request(struct nvme_qpair *qpair, struct nvme_request *req, + struct nvme_tracker *tr) +{ + int rc; + uint64_t phys_addr; + uint32_t data_transfered, remaining_transfer_len, length; + uint32_t nseg, cur_nseg, total_nseg, last_nseg, modulo, unaligned; + uint32_t sge_count = 0; + uint64_t prp2 = 0; + struct nvme_request *parent; + + /* + * Build scattered payloads. + */ + + parent = req->parent ? req->parent : req; + nvme_assert(req->reset_sgl_fn != NULL, ("sgl reset callback required\n")); + req->reset_sgl_fn(parent->cb_arg, req->sgl_offset); + + remaining_transfer_len = req->payload_size; + total_nseg = 0; + last_nseg = 0; + + while (remaining_transfer_len > 0) { + nvme_assert(req->next_sge_fn != NULL, ("sgl callback required\n")); + rc = req->next_sge_fn(parent->cb_arg, &phys_addr, &length); + if (rc) + return -1; + + data_transfered = nvme_min(remaining_transfer_len, length); + + nseg = data_transfered >> nvme_u32log2(PAGE_SIZE); + modulo = data_transfered & (PAGE_SIZE - 1); + unaligned = phys_addr & (PAGE_SIZE - 1); + if (modulo || unaligned) { + nseg += 1 + ((modulo + unaligned - 1) >> nvme_u32log2(PAGE_SIZE)); + } + + if (total_nseg == 0) { + req->cmd.psdt = NVME_PSDT_PRP; + req->cmd.dptr.prp.prp1 = phys_addr; + } + + total_nseg += nseg; + sge_count++; + remaining_transfer_len -= data_transfered; + + if (total_nseg == 2) { + if (sge_count == 1) + tr->req->cmd.dptr.prp.prp2 = phys_addr + PAGE_SIZE - unaligned; + else if (sge_count == 2) + tr->req->cmd.dptr.prp.prp2 = phys_addr; + /* save prp2 value */ + prp2 = tr->req->cmd.dptr.prp.prp2; + } else if (total_nseg > 2) { + if (sge_count == 1) + cur_nseg = 1; + else + cur_nseg = 0; + + tr->req->cmd.dptr.prp.prp2 = (uint64_t)tr->prp_bus_addr; + while (cur_nseg < nseg) { + if (prp2) { + tr->prp[0] = prp2; + tr->prp[last_nseg + 1] = phys_addr + cur_nseg * PAGE_SIZE - unaligned; + } else + tr->prp[last_nseg] = phys_addr + cur_nseg * PAGE_SIZE - unaligned; + + last_nseg++; + cur_nseg++; + + /* physical address and length check */ + if (remaining_transfer_len || (!remaining_transfer_len && (cur_nseg < nseg))) { + if ((length & (PAGE_SIZE - 1)) || unaligned) + return -1; + } + } + } + } + + return 0; +} + void nvme_qpair_submit_request(struct nvme_qpair *qpair, struct nvme_request *req) { + int rc; struct nvme_tracker *tr; struct nvme_request *child_req; uint64_t phys_addr; @@ -718,7 +803,7 @@ nvme_qpair_submit_request(struct nvme_qpair *qpair, struct nvme_request *req) tr->req = req; req->cmd.cid = tr->cid; - if (req->payload_size) { + if (req->u.payload) { /* * Build PRP list describing payload buffer. */ @@ -754,6 +839,12 @@ nvme_qpair_submit_request(struct nvme_qpair *qpair, struct nvme_request *req) cur_nseg++; } } + } else if (req->u.payload == NULL && req->payload_size != 0) { + rc = _nvme_qpair_build_sgl_request(qpair, req, tr); + if (rc < 0) { + _nvme_fail_request_bad_vtophys(qpair, tr); + return; + } } nvme_qpair_submit_tracker(qpair, tr); diff --git a/test/lib/nvme/Makefile b/test/lib/nvme/Makefile index c0bcd4f34..beabcca6e 100644 --- a/test/lib/nvme/Makefile +++ b/test/lib/nvme/Makefile @@ -34,7 +34,7 @@ SPDK_ROOT_DIR := $(CURDIR)/../../.. include $(SPDK_ROOT_DIR)/mk/spdk.common.mk -DIRS-y = unit aer reset +DIRS-y = unit aer reset sgl .PHONY: all clean $(DIRS-y) diff --git a/test/lib/nvme/nvme.sh b/test/lib/nvme/nvme.sh index 15ff7bf74..e8557e78a 100755 --- a/test/lib/nvme/nvme.sh +++ b/test/lib/nvme/nvme.sh @@ -33,5 +33,8 @@ timing_enter reset $testdir/reset/reset -q 64 -w write -s 4096 -t 15 timing_exit reset +timing_enter sgl +$testdir/sgl/sgl +timing_exit sgl timing_exit nvme diff --git a/test/lib/nvme/sgl/Makefile b/test/lib/nvme/sgl/Makefile new file mode 100644 index 000000000..c196481d6 --- /dev/null +++ b/test/lib/nvme/sgl/Makefile @@ -0,0 +1,57 @@ +# +# BSD LICENSE +# +# Copyright(c) 2010-2015 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(CURDIR)/../../../.. +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk + +APP = sgl + +C_SRCS := nvme_sgl.c + +CFLAGS += -I. $(DPDK_INC) + +SPDK_LIBS += $(SPDK_ROOT_DIR)/lib/nvme/libspdk_nvme.a \ + $(SPDK_ROOT_DIR)/lib/util/libspdk_util.a \ + $(SPDK_ROOT_DIR)/lib/memory/libspdk_memory.a + +LIBS += $(SPDK_LIBS) -lpciaccess -lpthread $(DPDK_LIB) -lrt + +all : $(APP) + +$(APP) : $(OBJS) $(SPDK_LIBS) + $(LINK_C) + +clean : + $(CLEAN_C) $(APP) + +include $(SPDK_ROOT_DIR)/mk/spdk.deps.mk diff --git a/test/lib/nvme/sgl/nvme_sgl.c b/test/lib/nvme/sgl/nvme_sgl.c new file mode 100644 index 000000000..67d728a8c --- /dev/null +++ b/test/lib/nvme/sgl/nvme_sgl.c @@ -0,0 +1,478 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include /* for struct iovec */ + +#include + +#include +#include +#include +#include + +#include "spdk/nvme.h" +#include "spdk/pci.h" + +struct rte_mempool *request_mempool; + +#define MAX_DEVS 64 + +#define MAX_IOVS 128 + +#define DATA_PATTERN 0x5A + +#define BASE_LBA_START 0x100000 + +struct dev { + struct pci_device *pci_dev; + struct nvme_controller *ctrlr; + char name[100]; +}; + +static struct dev devs[MAX_DEVS]; +static int num_devs = 0; + +#define foreach_dev(iter) \ + for (iter = devs; iter - devs < num_devs; iter++) + +static int io_complete_flag = 0; + +struct io_request { + int current_iov_index; + uint32_t current_iov_bytes_left; + struct iovec iovs[MAX_IOVS]; + int nseg; +}; + +static void nvme_request_reset_sgl(void *cb_arg, uint32_t sgl_offset) +{ + int i; + uint32_t offset = 0; + struct iovec *iov; + struct io_request *req = (struct io_request *)cb_arg; + + for (i = 0; i < req->nseg; i++) { + iov = &req->iovs[i]; + offset += iov->iov_len; + if (offset > sgl_offset) + break; + } + req->current_iov_index = i; + req->current_iov_bytes_left = offset - sgl_offset; + return; +} + +static int nvme_request_next_sge(void *cb_arg, uint64_t *address, uint32_t *length) +{ + struct io_request *req = (struct io_request *)cb_arg; + struct iovec *iov; + + if (req->current_iov_index >= req->nseg) { + *length = 0; + *address = 0; + return 0; + } + + iov = &req->iovs[req->current_iov_index]; + + if (req->current_iov_bytes_left) { + *address = rte_malloc_virt2phy(iov->iov_base) + iov->iov_len - req->current_iov_bytes_left; + *length = req->current_iov_bytes_left; + req->current_iov_bytes_left = 0; + } else { + *address = rte_malloc_virt2phy(iov->iov_base); + *length = iov->iov_len; + } + + req->current_iov_index++; + + return 0; +} + +static void +io_complete(void *ctx, const struct nvme_completion *cpl) +{ + if (nvme_completion_is_error(cpl)) + io_complete_flag = 2; + else + io_complete_flag = 1; +} + +static uint32_t build_io_request_1(struct io_request *req) +{ + int i, found = 0; + uint8_t *buf; + uint64_t v_addr; + uint32_t len = 0; + + req->nseg = 3; + + /* 2KB for 1st sge, make sure the iov address start at 0x800 boundary, + * and end with 0x1000 boundary */ + for (i = 0; i < 8; i++) { + buf = rte_zmalloc(NULL, 0x800, 0x800); + v_addr = (uint64_t)buf; + if (v_addr & 0x800ULL) { + found = 1; + break; + } + } + if (!found) + return 0; + req->iovs[0].iov_base = rte_zmalloc(NULL, 0x800, 0x800); + req->iovs[0].iov_len = 0x800; + + /* 4KB for 2th sge */ + req->iovs[1].iov_base = rte_zmalloc(NULL, 0x1000, 0x1000); + req->iovs[1].iov_len = 0x1000; + + /* 12KB for 3th sge */ + req->iovs[2].iov_base = rte_zmalloc(NULL, 0x3000, 0x1000); + req->iovs[2].iov_len = 0x3000; + + for (i = 0; i < req->nseg; i++) + len += req->iovs[i].iov_len; + + return len; +} + +static uint32_t build_io_request_2(struct io_request *req) +{ + int i; + uint32_t len = 0; + + req->nseg = 32; + + /* 4KB for 1st sge */ + req->iovs[0].iov_base = rte_zmalloc(NULL, 0x1000, 0x1000); + req->iovs[0].iov_len = 0x1000; + + /* 8KB for the rest 31 sge */ + for (i = 1; i < req->nseg; i++) { + req->iovs[i].iov_base = rte_zmalloc(NULL, 0x2000, 0x1000); + req->iovs[i].iov_len = 0x2000; + } + + for (i = 0; i < req->nseg; i++) + len += req->iovs[i].iov_len; + + return len; +} + +static uint32_t build_io_request_3(struct io_request *req) +{ + int i; + uint32_t len = 0; + + req->nseg = 1; + + /* 8KB for 1st sge */ + req->iovs[0].iov_base = rte_zmalloc(NULL, 0x2000, 0x1000); + req->iovs[0].iov_len = 0x2000; + + for (i = 0; i < req->nseg; i++) + len += req->iovs[i].iov_len; + + return len; +} + +static uint32_t build_io_request_4(struct io_request *req) +{ + int i; + uint32_t len = 0; + + req->nseg = 2; + + /* 4KB for 1st sge */ + req->iovs[0].iov_base = rte_zmalloc(NULL, 0x1000, 0x1000); + req->iovs[0].iov_len = 0x1000; + + /* 4KB for 2st sge */ + req->iovs[1].iov_base = rte_zmalloc(NULL, 0x1000, 0x1000); + req->iovs[1].iov_len = 0x1000; + + for (i = 0; i < req->nseg; i++) + len += req->iovs[i].iov_len; + + return len; +} + +static uint32_t build_io_request_5(struct io_request *req) +{ + int i; + uint32_t len = 0; + + req->nseg = 1; + + /* 256KB for 1st sge */ + req->iovs[0].iov_base = rte_zmalloc(NULL, 0x40000, 0x1000); + req->iovs[0].iov_len = 0x40000; + + for (i = 0; i < req->nseg; i++) + len += req->iovs[i].iov_len; + + return len; +} + +static uint32_t build_io_request_6(struct io_request *req) +{ + int i; + uint32_t len = 0; + + req->nseg = 1; + + /* 512B for 1st sge */ + req->iovs[0].iov_base = rte_zmalloc(NULL, 0x200, 0x200); + req->iovs[0].iov_len = 0x200; + + for (i = 0; i < req->nseg; i++) + len += req->iovs[i].iov_len; + + return len; +} + +typedef uint32_t (*nvme_build_io_req_fn_t)(struct io_request *req); + +static int +writev_readv_tests(struct dev *dev, nvme_build_io_req_fn_t build_io_fn) +{ + int rc = 0; + uint32_t len, lba_count; + uint32_t i, j, nseg; + char *buf; + + struct io_request *req; + struct nvme_namespace *ns; + const struct nvme_namespace_data *nsdata; + + ns = nvme_ctrlr_get_ns(dev->ctrlr, 1); + nsdata = nvme_ns_get_data(ns); + if (!ns || !nsdata || !nvme_ns_get_sector_size(ns)) + return -1; + + req = rte_zmalloc(NULL, sizeof(*req), 0); + if (!req) + return -1; + + /* IO parameters setting */ + len = build_io_fn(req); + if (!len) + return 0; + + lba_count = len / nvme_ns_get_sector_size(ns); + if (BASE_LBA_START + lba_count > (uint32_t)nsdata->nsze) { + rte_free(req); + return -1; + } + + nseg = req->nseg; + for (i = 0; i < nseg; i++) { + memset(req->iovs[i].iov_base, DATA_PATTERN, req->iovs[i].iov_len); + } + + rc = nvme_ns_cmd_writev(ns, BASE_LBA_START, lba_count, + io_complete, req, + nvme_request_reset_sgl, + nvme_request_next_sge); + + if (rc != 0) { + fprintf(stderr, "Writev Failed\n"); + rte_free(req); + return -1; + } + + io_complete_flag = 0; + + while (!io_complete_flag) + nvme_ctrlr_process_io_completions(dev->ctrlr, 1); + + if (io_complete_flag != 1) { + fprintf(stderr, "%s Writev Failed\n", dev->name); + rte_free(req); + return -1; + } + + /* reset completion flag */ + io_complete_flag = 0; + + for (i = 0; i < nseg; i++) { + memset(req->iovs[i].iov_base, 0, req->iovs[i].iov_len); + } + + rc = nvme_ns_cmd_readv(ns, BASE_LBA_START, lba_count, + io_complete, req, + nvme_request_reset_sgl, + nvme_request_next_sge); + + if (rc != 0) { + fprintf(stderr, "Readv Failed\n"); + rte_free(req); + return -1; + } + + while (!io_complete_flag) + nvme_ctrlr_process_io_completions(dev->ctrlr, 1); + + if (io_complete_flag != 1) { + fprintf(stderr, "%s Readv Failed\n", dev->name); + rte_free(req); + return -1; + } + + for (i = 0; i < nseg; i++) { + buf = (char *)req->iovs[i].iov_base; + for (j = 0; j < req->iovs[i].iov_len; j++) { + if (buf[j] != DATA_PATTERN) { + fprintf(stderr, "Write/Read Sucess, But %s Memcmp Failed\n", dev->name); + rte_free(req); + return -1; + } + } + } + + fprintf(stdout, "%s %s Test Passed\n", dev->name, __func__); + rte_free(req); + return rc; +} + +static const char *ealargs[] = { + "nvme_sgl", + "-c 0x1", + "-n 4", +}; + +int main(int argc, char **argv) +{ + struct pci_device_iterator *pci_dev_iter; + struct pci_device *pci_dev; + struct dev *iter; + struct pci_id_match match; + int rc, i; + + printf("NVMe Readv/Writev Request test\n"); + + rc = rte_eal_init(sizeof(ealargs) / sizeof(ealargs[0]), + (char **)(void *)(uintptr_t)ealargs); + + if (rc < 0) { + fprintf(stderr, "could not initialize dpdk\n"); + exit(1); + } + + request_mempool = rte_mempool_create("nvme_request", 8192, + nvme_request_size(), 128, 0, + NULL, NULL, NULL, NULL, + SOCKET_ID_ANY, 0); + + if (request_mempool == NULL) { + fprintf(stderr, "could not initialize request mempool\n"); + exit(1); + } + + pci_system_init(); + + match.vendor_id = PCI_MATCH_ANY; + match.subvendor_id = PCI_MATCH_ANY; + match.subdevice_id = PCI_MATCH_ANY; + match.device_id = PCI_MATCH_ANY; + match.device_class = NVME_CLASS_CODE; + match.device_class_mask = 0xFFFFFF; + + pci_dev_iter = pci_id_match_iterator_create(&match); + + rc = 0; + while ((pci_dev = pci_device_next(pci_dev_iter))) { + struct dev *dev; + + if (pci_device_has_non_uio_driver(pci_dev)) { + fprintf(stderr, "non-null kernel driver attached to nvme\n"); + fprintf(stderr, " controller at pci bdf %d:%d:%d\n", + pci_dev->bus, pci_dev->dev, pci_dev->func); + fprintf(stderr, " skipping...\n"); + continue; + } + + pci_device_probe(pci_dev); + + /* add to dev list */ + dev = &devs[num_devs++]; + + dev->pci_dev = pci_dev; + + snprintf(dev->name, sizeof(dev->name), "%04X:%02X:%02X.%02X", + pci_dev->domain, pci_dev->bus, pci_dev->dev, pci_dev->func); + + printf("%s: attaching NVMe driver...\n", dev->name); + + dev->ctrlr = nvme_attach(pci_dev); + if (dev->ctrlr == NULL) { + fprintf(stderr, "failed to attach to NVMe controller %s\n", dev->name); + rc = 1; + continue; /* TODO: just abort */ + } + } + pci_iterator_destroy(pci_dev_iter); + + if (num_devs) { + rc = nvme_register_io_thread(); + if (rc != 0) + return rc; + } + + foreach_dev(iter) { + if (writev_readv_tests(iter, build_io_request_1) + || writev_readv_tests(iter, build_io_request_2) + || writev_readv_tests(iter, build_io_request_3) + || writev_readv_tests(iter, build_io_request_4) + || writev_readv_tests(iter, build_io_request_5) + || writev_readv_tests(iter, build_io_request_6)) { + printf("%s: failed sgl tests\n", iter->name); + } + } + + printf("Cleaning up...\n"); + + for (i = 0; i < num_devs; i++) { + struct dev *dev = &devs[i]; + + nvme_detach(dev->ctrlr); + } + + if (num_devs) + nvme_unregister_io_thread(); + + return rc; +}