diff --git a/include/spdk/nvme.h b/include/spdk/nvme.h
index 67b31e0b0..ba3d5f1d3 100644
--- a/include/spdk/nvme.h
+++ b/include/spdk/nvme.h
@@ -364,6 +364,23 @@ enum nvme_namespace_flags {
  */
 uint32_t nvme_ns_get_flags(struct nvme_namespace *ns);
 
+/**
+ * Restart the SGL walk to the specified offset when the command has scattered payloads.
+ *
+ * The cb_arg parameter is the value passed to readv/writev.
+ */
+typedef void (*nvme_req_reset_sgl_fn_t)(void *cb_arg, uint32_t offset);
+
+/**
+ * Fill out *address and *length with the current SGL entry and advance to the next
+ * entry for the next time the callback is invoked.
+ *
+ * The cb_arg parameter is the value passed to readv/writev.
+ * The address parameter contains the physical address of this segment.
+ * The length parameter contains the length of this physical segment.
+ */
+typedef int (*nvme_req_next_sge_fn_t)(void *cb_arg, uint64_t *address, uint32_t *length);
+
 /**
  * \brief Submits a write I/O to the specified NVMe namespace.
  *
@@ -384,6 +401,29 @@ int nvme_ns_cmd_write(struct nvme_namespace *ns, void *payload,
 		      uint64_t lba, uint32_t lba_count, nvme_cb_fn_t cb_fn,
 		      void *cb_arg);
 
+/**
+ * \brief Submits a write I/O to the specified NVMe namespace.
+ *
+ * \param ns NVMe namespace to submit the write I/O
+ * \param lba starting LBA to write the data
+ * \param lba_count length (in sectors) for the write operation
+ * \param cb_fn callback function to invoke when the I/O is completed
+ * \param cb_arg argument to pass to the callback function
+ * \param reset_sgl_fn callback function to reset scattered payload
+ * \param next_sge_fn callback function to iterate each scattered
+ * payload memory segment
+ *
+ * \return 0 if successfully submitted, ENOMEM if an nvme_request
+ *	     structure cannot be allocated for the I/O request
+ *
+ * This function is thread safe and can be called at any point after
+ * nvme_register_io_thread().
+ */
+int nvme_ns_cmd_writev(struct nvme_namespace *ns, uint64_t lba, uint32_t lba_count,
+		       nvme_cb_fn_t cb_fn, void *cb_arg,
+		       nvme_req_reset_sgl_fn_t reset_sgl_fn,
+		       nvme_req_next_sge_fn_t next_sge_fn);
+
 /**
  * \brief Submits a read I/O to the specified NVMe namespace.
  *
@@ -404,6 +444,30 @@ int nvme_ns_cmd_read(struct nvme_namespace *ns, void *payload,
 		     uint64_t lba, uint32_t lba_count, nvme_cb_fn_t cb_fn,
 		     void *cb_arg);
 
+/**
+ * \brief Submits a read I/O to the specified NVMe namespace.
+ *
+ * \param ns NVMe namespace to submit the read I/O
+ * \param lba starting LBA to read the data
+ * \param lba_count length (in sectors) for the read operation
+ * \param cb_fn callback function to invoke when the I/O is completed
+ * \param cb_arg argument to pass to the callback function
+ * \param reset_sgl_fn callback function to reset scattered payload
+ * \param next_sge_fn callback function to iterate each scattered
+ * payload memory segment
+ *
+ * \return 0 if successfully submitted, ENOMEM if an nvme_request
+ *	     structure cannot be allocated for the I/O request
+ *
+ * This function is thread safe and can be called at any point after
+ * nvme_register_io_thread().
+ */
+int nvme_ns_cmd_readv(struct nvme_namespace *ns, uint64_t lba, uint32_t lba_count,
+		      nvme_cb_fn_t cb_fn, void *cb_arg,
+		      nvme_req_reset_sgl_fn_t reset_sgl_fn,
+		      nvme_req_next_sge_fn_t next_sge_fn);
+
+
 /**
  * \brief Submits a deallocation request to the specified NVMe namespace.
  *
diff --git a/lib/nvme/nvme.c b/lib/nvme/nvme.c
index 4ca915cbe..06c15c7cb 100644
--- a/lib/nvme/nvme.c
+++ b/lib/nvme/nvme.c
@@ -145,16 +145,11 @@ nvme_allocate_request(void *payload, uint32_t payload_size,
 	req->cb_fn = cb_fn;
 	req->cb_arg = cb_arg;
 	req->timeout = true;
-	nvme_assert((payload == NULL && payload_size == 0) ||
-		    (payload != NULL && payload_size != 0),
-		    ("Invalid argument combination of payload and payload_size\n"));
-	if (payload == NULL || payload_size == 0) {
-		req->u.payload = NULL;
-		req->payload_size = 0;
-	} else {
-		req->u.payload = payload;
-		req->payload_size = payload_size;
-	}
+	req->sgl_offset = 0;
+	req->parent = NULL;
+
+	req->u.payload = payload;
+	req->payload_size = payload_size;
 
 	return req;
 }
diff --git a/lib/nvme/nvme_internal.h b/lib/nvme/nvme_internal.h
index 6e15dd197..91081ef22 100644
--- a/lib/nvme/nvme_internal.h
+++ b/lib/nvme/nvme_internal.h
@@ -159,6 +159,13 @@ struct nvme_request {
 	 *  status once all child requests are completed.
 	 */
 	struct nvme_completion		parent_status;
+
+	/**
+	 * Functions for retrieving physical addresses for scattered payloads.
+	 */
+	nvme_req_reset_sgl_fn_t reset_sgl_fn;
+	nvme_req_next_sge_fn_t next_sge_fn;
+	uint32_t sgl_offset;
 };
 
 struct nvme_completion_poll_status {
diff --git a/lib/nvme/nvme_ns_cmd.c b/lib/nvme/nvme_ns_cmd.c
index 069e2880c..3e3ec4749 100644
--- a/lib/nvme/nvme_ns_cmd.c
+++ b/lib/nvme/nvme_ns_cmd.c
@@ -41,7 +41,8 @@
 static struct nvme_request *
 _nvme_ns_cmd_rw(struct nvme_namespace *ns, void *payload, uint64_t lba,
 		uint32_t lba_count, nvme_cb_fn_t cb_fn, void *cb_arg,
-		uint32_t opc);
+		uint32_t opc, nvme_req_reset_sgl_fn_t reset_sgl_fn,
+		nvme_req_next_sge_fn_t next_sge_fn);
 
 static void
 nvme_cb_complete_child(void *child_arg, const struct nvme_completion *cpl)
@@ -91,10 +92,13 @@ _nvme_ns_cmd_split_request(struct nvme_namespace *ns, void *payload,
 			   uint64_t lba, uint32_t lba_count,
 			   nvme_cb_fn_t cb_fn, void *cb_arg, uint32_t opc,
 			   struct nvme_request *req,
-			   uint32_t sectors_per_max_io, uint32_t sector_mask)
+			   uint32_t sectors_per_max_io, uint32_t sector_mask,
+			   nvme_req_reset_sgl_fn_t reset_sgl_fn,
+			   nvme_req_next_sge_fn_t next_sge_fn)
 {
 	uint32_t		sector_size = ns->sector_size;
 	uint32_t		remaining_lba_count = lba_count;
+	uint32_t		offset = 0;
 	struct nvme_request	*child;
 
 	while (remaining_lba_count > 0) {
@@ -102,7 +106,7 @@ _nvme_ns_cmd_split_request(struct nvme_namespace *ns, void *payload,
 		lba_count = nvme_min(remaining_lba_count, lba_count);
 
 		child = _nvme_ns_cmd_rw(ns, payload, lba, lba_count, cb_fn,
-					cb_arg, opc);
+					cb_arg, opc, reset_sgl_fn, next_sge_fn);
 		if (child == NULL) {
 			nvme_free_request(req);
 			return NULL;
@@ -110,7 +114,11 @@ _nvme_ns_cmd_split_request(struct nvme_namespace *ns, void *payload,
 		nvme_request_add_child(req, child);
 		remaining_lba_count -= lba_count;
 		lba += lba_count;
-		payload = (void *)((uintptr_t)payload + (lba_count * sector_size));
+		if (req->u.payload == NULL) {
+			child->sgl_offset = offset;
+			offset += lba_count * ns->sector_size;
+		} else
+			payload = (void *)((uintptr_t)payload + (lba_count * sector_size));
 	}
 
 	return req;
@@ -119,7 +127,8 @@ _nvme_ns_cmd_split_request(struct nvme_namespace *ns, void *payload,
 static struct nvme_request *
 _nvme_ns_cmd_rw(struct nvme_namespace *ns, void *payload, uint64_t lba,
 		uint32_t lba_count, nvme_cb_fn_t cb_fn, void *cb_arg,
-		uint32_t opc)
+		uint32_t opc, nvme_req_reset_sgl_fn_t reset_sgl_fn,
+		nvme_req_next_sge_fn_t next_sge_fn)
 {
 	struct nvme_request	*req;
 	struct nvme_command	*cmd;
@@ -137,6 +146,9 @@ _nvme_ns_cmd_rw(struct nvme_namespace *ns, void *payload, uint64_t lba,
 		return NULL;
 	}
 
+	req->reset_sgl_fn = reset_sgl_fn;
+	req->next_sge_fn = next_sge_fn;
+
 	/*
 	 * Intel DC P3*00 NVMe controllers benefit from driver-assisted striping.
 	 * If this controller defines a stripe boundary and this I/O spans a stripe
@@ -147,10 +159,12 @@ _nvme_ns_cmd_rw(struct nvme_namespace *ns, void *payload, uint64_t lba,
 	    (((lba & (sectors_per_stripe - 1)) + lba_count) > sectors_per_stripe)) {
 
 		return _nvme_ns_cmd_split_request(ns, payload, lba, lba_count, cb_fn, cb_arg, opc,
-						  req, sectors_per_stripe, sectors_per_stripe - 1);
+						  req, sectors_per_stripe, sectors_per_stripe - 1,
+						  reset_sgl_fn, next_sge_fn);
 	} else if (lba_count > sectors_per_max_io) {
 		return _nvme_ns_cmd_split_request(ns, payload, lba, lba_count, cb_fn, cb_arg, opc,
-						  req, sectors_per_max_io, 0);
+						  req, sectors_per_max_io, 0,
+						  reset_sgl_fn, next_sge_fn);
 	} else {
 		cmd = &req->cmd;
 		cmd->opc = opc;
@@ -170,7 +184,25 @@ nvme_ns_cmd_read(struct nvme_namespace *ns, void *payload, uint64_t lba,
 {
 	struct nvme_request *req;
 
-	req = _nvme_ns_cmd_rw(ns, payload, lba, lba_count, cb_fn, cb_arg, NVME_OPC_READ);
+	req = _nvme_ns_cmd_rw(ns, payload, lba, lba_count, cb_fn, cb_arg, NVME_OPC_READ, NULL, NULL);
+	if (req != NULL) {
+		nvme_ctrlr_submit_io_request(ns->ctrlr, req);
+		return 0;
+	} else {
+		return ENOMEM;
+	}
+}
+
+int
+nvme_ns_cmd_readv(struct nvme_namespace *ns, uint64_t lba, uint32_t lba_count,
+		  nvme_cb_fn_t cb_fn, void *cb_arg,
+		  nvme_req_reset_sgl_fn_t reset_sgl_fn,
+		  nvme_req_next_sge_fn_t next_sge_fn)
+{
+	struct nvme_request *req;
+
+	req = _nvme_ns_cmd_rw(ns, NULL, lba, lba_count, cb_fn, cb_arg, NVME_OPC_READ, reset_sgl_fn,
+			      next_sge_fn);
 	if (req != NULL) {
 		nvme_ctrlr_submit_io_request(ns->ctrlr, req);
 		return 0;
@@ -185,7 +217,25 @@ nvme_ns_cmd_write(struct nvme_namespace *ns, void *payload, uint64_t lba,
 {
 	struct nvme_request *req;
 
-	req = _nvme_ns_cmd_rw(ns, payload, lba, lba_count, cb_fn, cb_arg, NVME_OPC_WRITE);
+	req = _nvme_ns_cmd_rw(ns, payload, lba, lba_count, cb_fn, cb_arg, NVME_OPC_WRITE, NULL, NULL);
+	if (req != NULL) {
+		nvme_ctrlr_submit_io_request(ns->ctrlr, req);
+		return 0;
+	} else {
+		return ENOMEM;
+	}
+}
+
+int
+nvme_ns_cmd_writev(struct nvme_namespace *ns, uint64_t lba, uint32_t lba_count,
+		   nvme_cb_fn_t cb_fn, void *cb_arg,
+		   nvme_req_reset_sgl_fn_t reset_sgl_fn,
+		   nvme_req_next_sge_fn_t next_sge_fn)
+{
+	struct nvme_request *req;
+
+	req = _nvme_ns_cmd_rw(ns, NULL, lba, lba_count, cb_fn, cb_arg, NVME_OPC_WRITE, reset_sgl_fn,
+			      next_sge_fn);
 	if (req != NULL) {
 		nvme_ctrlr_submit_io_request(ns->ctrlr, req);
 		return 0;
diff --git a/lib/nvme/nvme_qpair.c b/lib/nvme/nvme_qpair.c
index 6f23d91a2..dea80621c 100644
--- a/lib/nvme/nvme_qpair.c
+++ b/lib/nvme/nvme_qpair.c
@@ -668,9 +668,94 @@ _nvme_fail_request_ctrlr_failed(struct nvme_qpair *qpair, struct nvme_request *r
 					   NVME_SC_ABORTED_BY_REQUEST, true);
 }
 
+static int
+_nvme_qpair_build_sgl_request(struct nvme_qpair *qpair, struct nvme_request *req,
+			      struct nvme_tracker *tr)
+{
+	int rc;
+	uint64_t phys_addr;
+	uint32_t data_transfered, remaining_transfer_len, length;
+	uint32_t nseg, cur_nseg, total_nseg, last_nseg, modulo, unaligned;
+	uint32_t sge_count = 0;
+	uint64_t prp2 = 0;
+	struct nvme_request *parent;
+
+	/*
+	 * Build scattered payloads.
+	 */
+
+	parent = req->parent ? req->parent : req;
+	nvme_assert(req->reset_sgl_fn != NULL, ("sgl reset callback required\n"));
+	req->reset_sgl_fn(parent->cb_arg, req->sgl_offset);
+
+	remaining_transfer_len = req->payload_size;
+	total_nseg = 0;
+	last_nseg = 0;
+
+	while (remaining_transfer_len > 0) {
+		nvme_assert(req->next_sge_fn != NULL, ("sgl callback required\n"));
+		rc = req->next_sge_fn(parent->cb_arg, &phys_addr, &length);
+		if (rc)
+			return -1;
+
+		data_transfered = nvme_min(remaining_transfer_len, length);
+
+		nseg = data_transfered >> nvme_u32log2(PAGE_SIZE);
+		modulo = data_transfered & (PAGE_SIZE - 1);
+		unaligned = phys_addr & (PAGE_SIZE - 1);
+		if (modulo || unaligned) {
+			nseg += 1 + ((modulo + unaligned - 1) >> nvme_u32log2(PAGE_SIZE));
+		}
+
+		if (total_nseg == 0) {
+			req->cmd.psdt = NVME_PSDT_PRP;
+			req->cmd.dptr.prp.prp1 = phys_addr;
+		}
+
+		total_nseg += nseg;
+		sge_count++;
+		remaining_transfer_len -= data_transfered;
+
+		if (total_nseg == 2) {
+			if (sge_count == 1)
+				tr->req->cmd.dptr.prp.prp2 = phys_addr + PAGE_SIZE - unaligned;
+			else if (sge_count == 2)
+				tr->req->cmd.dptr.prp.prp2 = phys_addr;
+			/* save prp2 value */
+			prp2 = tr->req->cmd.dptr.prp.prp2;
+		} else if (total_nseg > 2) {
+			if (sge_count == 1)
+				cur_nseg = 1;
+			else
+				cur_nseg = 0;
+
+			tr->req->cmd.dptr.prp.prp2 = (uint64_t)tr->prp_bus_addr;
+			while (cur_nseg < nseg) {
+				if (prp2) {
+					tr->prp[0] = prp2;
+					tr->prp[last_nseg + 1] = phys_addr + cur_nseg * PAGE_SIZE - unaligned;
+				} else
+					tr->prp[last_nseg] = phys_addr + cur_nseg * PAGE_SIZE - unaligned;
+
+				last_nseg++;
+				cur_nseg++;
+
+				/* physical address and length check */
+				if (remaining_transfer_len || (!remaining_transfer_len && (cur_nseg < nseg))) {
+					if ((length & (PAGE_SIZE - 1)) || unaligned)
+						return -1;
+				}
+			}
+		}
+	}
+
+	return 0;
+}
+
 void
 nvme_qpair_submit_request(struct nvme_qpair *qpair, struct nvme_request *req)
 {
+	int			rc;
 	struct nvme_tracker	*tr;
 	struct nvme_request	*child_req;
 	uint64_t phys_addr;
@@ -718,7 +803,7 @@ nvme_qpair_submit_request(struct nvme_qpair *qpair, struct nvme_request *req)
 	tr->req = req;
 	req->cmd.cid = tr->cid;
 
-	if (req->payload_size) {
+	if (req->u.payload) {
 		/*
 		 * Build PRP list describing payload buffer.
 		 */
@@ -754,6 +839,12 @@ nvme_qpair_submit_request(struct nvme_qpair *qpair, struct nvme_request *req)
 				cur_nseg++;
 			}
 		}
+	} else if (req->u.payload == NULL && req->payload_size != 0) {
+		rc = _nvme_qpair_build_sgl_request(qpair, req, tr);
+		if (rc < 0) {
+			_nvme_fail_request_bad_vtophys(qpair, tr);
+			return;
+		}
 	}
 
 	nvme_qpair_submit_tracker(qpair, tr);
diff --git a/test/lib/nvme/Makefile b/test/lib/nvme/Makefile
index c0bcd4f34..beabcca6e 100644
--- a/test/lib/nvme/Makefile
+++ b/test/lib/nvme/Makefile
@@ -34,7 +34,7 @@
 SPDK_ROOT_DIR := $(CURDIR)/../../..
 include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
 
-DIRS-y = unit aer reset
+DIRS-y = unit aer reset sgl
 
 .PHONY: all clean $(DIRS-y)
 
diff --git a/test/lib/nvme/nvme.sh b/test/lib/nvme/nvme.sh
index 15ff7bf74..e8557e78a 100755
--- a/test/lib/nvme/nvme.sh
+++ b/test/lib/nvme/nvme.sh
@@ -33,5 +33,8 @@ timing_enter reset
 $testdir/reset/reset -q 64 -w write -s 4096 -t 15
 timing_exit reset
 
+timing_enter sgl
+$testdir/sgl/sgl
+timing_exit sgl
 
 timing_exit nvme
diff --git a/test/lib/nvme/sgl/Makefile b/test/lib/nvme/sgl/Makefile
new file mode 100644
index 000000000..c196481d6
--- /dev/null
+++ b/test/lib/nvme/sgl/Makefile
@@ -0,0 +1,57 @@
+#
+#  BSD LICENSE
+#
+#  Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(CURDIR)/../../../..
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+APP = sgl
+
+C_SRCS := nvme_sgl.c
+
+CFLAGS += -I. $(DPDK_INC)
+
+SPDK_LIBS += $(SPDK_ROOT_DIR)/lib/nvme/libspdk_nvme.a \
+	     $(SPDK_ROOT_DIR)/lib/util/libspdk_util.a \
+	     $(SPDK_ROOT_DIR)/lib/memory/libspdk_memory.a
+
+LIBS += $(SPDK_LIBS) -lpciaccess -lpthread $(DPDK_LIB) -lrt
+
+all : $(APP)
+
+$(APP) : $(OBJS) $(SPDK_LIBS)
+	$(LINK_C)
+
+clean :
+	$(CLEAN_C) $(APP)
+
+include $(SPDK_ROOT_DIR)/mk/spdk.deps.mk
diff --git a/test/lib/nvme/sgl/nvme_sgl.c b/test/lib/nvme/sgl/nvme_sgl.c
new file mode 100644
index 000000000..67d728a8c
--- /dev/null
+++ b/test/lib/nvme/sgl/nvme_sgl.c
@@ -0,0 +1,478 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdbool.h>
+#include <inttypes.h>
+#include <string.h>
+#include <sys/uio.h> /* for struct iovec */
+
+#include <pciaccess.h>
+
+#include <rte_config.h>
+#include <rte_malloc.h>
+#include <rte_mempool.h>
+#include <rte_lcore.h>
+
+#include "spdk/nvme.h"
+#include "spdk/pci.h"
+
+struct rte_mempool *request_mempool;
+
+#define MAX_DEVS 64
+
+#define MAX_IOVS 128
+
+#define DATA_PATTERN 0x5A
+
+#define BASE_LBA_START 0x100000
+
+struct dev {
+	struct pci_device			*pci_dev;
+	struct nvme_controller 			*ctrlr;
+	char 					name[100];
+};
+
+static struct dev devs[MAX_DEVS];
+static int num_devs = 0;
+
+#define foreach_dev(iter) \
+	for (iter = devs; iter - devs < num_devs; iter++)
+
+static int io_complete_flag = 0;
+
+struct io_request {
+	int current_iov_index;
+	uint32_t current_iov_bytes_left;
+	struct iovec iovs[MAX_IOVS];
+	int nseg;
+};
+
+static void nvme_request_reset_sgl(void *cb_arg, uint32_t sgl_offset)
+{
+	int i;
+	uint32_t offset = 0;
+	struct iovec *iov;
+	struct io_request *req = (struct io_request *)cb_arg;
+
+	for (i = 0; i < req->nseg; i++) {
+		iov = &req->iovs[i];
+		offset += iov->iov_len;
+		if (offset > sgl_offset)
+			break;
+	}
+	req->current_iov_index = i;
+	req->current_iov_bytes_left = offset - sgl_offset;
+	return;
+}
+
+static int nvme_request_next_sge(void *cb_arg, uint64_t *address, uint32_t *length)
+{
+	struct io_request *req = (struct io_request *)cb_arg;
+	struct iovec *iov;
+
+	if (req->current_iov_index >= req->nseg) {
+		*length = 0;
+		*address = 0;
+		return 0;
+	}
+
+	iov = &req->iovs[req->current_iov_index];
+
+	if (req->current_iov_bytes_left) {
+		*address = rte_malloc_virt2phy(iov->iov_base) + iov->iov_len - req->current_iov_bytes_left;
+		*length = req->current_iov_bytes_left;
+		req->current_iov_bytes_left = 0;
+	} else {
+		*address = rte_malloc_virt2phy(iov->iov_base);
+		*length = iov->iov_len;
+	}
+
+	req->current_iov_index++;
+
+	return 0;
+}
+
+static void
+io_complete(void *ctx, const struct nvme_completion *cpl)
+{
+	if (nvme_completion_is_error(cpl))
+		io_complete_flag = 2;
+	else
+		io_complete_flag = 1;
+}
+
+static uint32_t build_io_request_1(struct io_request *req)
+{
+	int i, found = 0;
+	uint8_t *buf;
+	uint64_t v_addr;
+	uint32_t len = 0;
+
+	req->nseg = 3;
+
+	/* 2KB for 1st sge, make sure the iov address start at 0x800 boundary,
+	 *  and end with 0x1000 boundary */
+	for (i = 0; i < 8; i++) {
+		buf = rte_zmalloc(NULL, 0x800, 0x800);
+		v_addr = (uint64_t)buf;
+		if (v_addr & 0x800ULL) {
+			found = 1;
+			break;
+		}
+	}
+	if (!found)
+		return 0;
+	req->iovs[0].iov_base = rte_zmalloc(NULL, 0x800, 0x800);
+	req->iovs[0].iov_len = 0x800;
+
+	/* 4KB for 2th sge */
+	req->iovs[1].iov_base = rte_zmalloc(NULL, 0x1000, 0x1000);
+	req->iovs[1].iov_len = 0x1000;
+
+	/* 12KB for 3th sge */
+	req->iovs[2].iov_base = rte_zmalloc(NULL, 0x3000, 0x1000);
+	req->iovs[2].iov_len = 0x3000;
+
+	for (i = 0; i < req->nseg; i++)
+		len += req->iovs[i].iov_len;
+
+	return len;
+}
+
+static uint32_t build_io_request_2(struct io_request *req)
+{
+	int i;
+	uint32_t len = 0;
+
+	req->nseg = 32;
+
+	/* 4KB for 1st sge */
+	req->iovs[0].iov_base = rte_zmalloc(NULL, 0x1000, 0x1000);
+	req->iovs[0].iov_len = 0x1000;
+
+	/* 8KB for the rest 31 sge */
+	for (i = 1; i < req->nseg; i++) {
+		req->iovs[i].iov_base = rte_zmalloc(NULL, 0x2000, 0x1000);
+		req->iovs[i].iov_len = 0x2000;
+	}
+
+	for (i = 0; i < req->nseg; i++)
+		len += req->iovs[i].iov_len;
+
+	return len;
+}
+
+static uint32_t build_io_request_3(struct io_request *req)
+{
+	int i;
+	uint32_t len = 0;
+
+	req->nseg = 1;
+
+	/* 8KB for 1st sge */
+	req->iovs[0].iov_base = rte_zmalloc(NULL, 0x2000, 0x1000);
+	req->iovs[0].iov_len = 0x2000;
+
+	for (i = 0; i < req->nseg; i++)
+		len += req->iovs[i].iov_len;
+
+	return len;
+}
+
+static uint32_t build_io_request_4(struct io_request *req)
+{
+	int i;
+	uint32_t len = 0;
+
+	req->nseg = 2;
+
+	/* 4KB for 1st sge */
+	req->iovs[0].iov_base = rte_zmalloc(NULL, 0x1000, 0x1000);
+	req->iovs[0].iov_len = 0x1000;
+
+	/* 4KB for 2st sge */
+	req->iovs[1].iov_base = rte_zmalloc(NULL, 0x1000, 0x1000);
+	req->iovs[1].iov_len = 0x1000;
+
+	for (i = 0; i < req->nseg; i++)
+		len += req->iovs[i].iov_len;
+
+	return len;
+}
+
+static uint32_t build_io_request_5(struct io_request *req)
+{
+	int i;
+	uint32_t len = 0;
+
+	req->nseg = 1;
+
+	/* 256KB for 1st sge */
+	req->iovs[0].iov_base = rte_zmalloc(NULL, 0x40000, 0x1000);
+	req->iovs[0].iov_len = 0x40000;
+
+	for (i = 0; i < req->nseg; i++)
+		len += req->iovs[i].iov_len;
+
+	return len;
+}
+
+static uint32_t build_io_request_6(struct io_request *req)
+{
+	int i;
+	uint32_t len = 0;
+
+	req->nseg = 1;
+
+	/* 512B for 1st sge */
+	req->iovs[0].iov_base = rte_zmalloc(NULL, 0x200, 0x200);
+	req->iovs[0].iov_len = 0x200;
+
+	for (i = 0; i < req->nseg; i++)
+		len += req->iovs[i].iov_len;
+
+	return len;
+}
+
+typedef uint32_t (*nvme_build_io_req_fn_t)(struct io_request *req);
+
+static int
+writev_readv_tests(struct dev *dev, nvme_build_io_req_fn_t build_io_fn)
+{
+	int rc = 0;
+	uint32_t len, lba_count;
+	uint32_t i, j, nseg;
+	char *buf;
+
+	struct io_request *req;
+	struct nvme_namespace *ns;
+	const struct nvme_namespace_data *nsdata;
+
+	ns = nvme_ctrlr_get_ns(dev->ctrlr, 1);
+	nsdata = nvme_ns_get_data(ns);
+	if (!ns || !nsdata || !nvme_ns_get_sector_size(ns))
+		return -1;
+
+	req = rte_zmalloc(NULL, sizeof(*req), 0);
+	if (!req)
+		return -1;
+
+	/* IO parameters setting */
+	len = build_io_fn(req);
+	if (!len)
+		return 0;
+
+	lba_count = len / nvme_ns_get_sector_size(ns);
+	if (BASE_LBA_START + lba_count > (uint32_t)nsdata->nsze) {
+		rte_free(req);
+		return -1;
+	}
+
+	nseg = req->nseg;
+	for (i = 0; i < nseg; i++) {
+		memset(req->iovs[i].iov_base, DATA_PATTERN, req->iovs[i].iov_len);
+	}
+
+	rc = nvme_ns_cmd_writev(ns, BASE_LBA_START, lba_count,
+				io_complete, req,
+				nvme_request_reset_sgl,
+				nvme_request_next_sge);
+
+	if (rc != 0) {
+		fprintf(stderr, "Writev Failed\n");
+		rte_free(req);
+		return -1;
+	}
+
+	io_complete_flag = 0;
+
+	while (!io_complete_flag)
+		nvme_ctrlr_process_io_completions(dev->ctrlr, 1);
+
+	if (io_complete_flag != 1) {
+		fprintf(stderr, "%s Writev Failed\n", dev->name);
+		rte_free(req);
+		return -1;
+	}
+
+	/* reset completion flag */
+	io_complete_flag = 0;
+
+	for (i = 0; i < nseg; i++) {
+		memset(req->iovs[i].iov_base, 0, req->iovs[i].iov_len);
+	}
+
+	rc = nvme_ns_cmd_readv(ns, BASE_LBA_START, lba_count,
+			       io_complete, req,
+			       nvme_request_reset_sgl,
+			       nvme_request_next_sge);
+
+	if (rc != 0) {
+		fprintf(stderr, "Readv Failed\n");
+		rte_free(req);
+		return -1;
+	}
+
+	while (!io_complete_flag)
+		nvme_ctrlr_process_io_completions(dev->ctrlr, 1);
+
+	if (io_complete_flag != 1) {
+		fprintf(stderr, "%s Readv Failed\n", dev->name);
+		rte_free(req);
+		return -1;
+	}
+
+	for (i = 0; i < nseg; i++) {
+		buf = (char *)req->iovs[i].iov_base;
+		for (j = 0; j < req->iovs[i].iov_len; j++) {
+			if (buf[j] != DATA_PATTERN) {
+				fprintf(stderr, "Write/Read Sucess, But %s Memcmp Failed\n", dev->name);
+				rte_free(req);
+				return -1;
+			}
+		}
+	}
+
+	fprintf(stdout, "%s %s Test Passed\n", dev->name, __func__);
+	rte_free(req);
+	return rc;
+}
+
+static const char *ealargs[] = {
+	"nvme_sgl",
+	"-c 0x1",
+	"-n 4",
+};
+
+int main(int argc, char **argv)
+{
+	struct pci_device_iterator	*pci_dev_iter;
+	struct pci_device		*pci_dev;
+	struct dev			*iter;
+	struct pci_id_match		match;
+	int				rc, i;
+
+	printf("NVMe Readv/Writev Request test\n");
+
+	rc = rte_eal_init(sizeof(ealargs) / sizeof(ealargs[0]),
+			  (char **)(void *)(uintptr_t)ealargs);
+
+	if (rc < 0) {
+		fprintf(stderr, "could not initialize dpdk\n");
+		exit(1);
+	}
+
+	request_mempool = rte_mempool_create("nvme_request", 8192,
+					     nvme_request_size(), 128, 0,
+					     NULL, NULL, NULL, NULL,
+					     SOCKET_ID_ANY, 0);
+
+	if (request_mempool == NULL) {
+		fprintf(stderr, "could not initialize request mempool\n");
+		exit(1);
+	}
+
+	pci_system_init();
+
+	match.vendor_id =	PCI_MATCH_ANY;
+	match.subvendor_id =	PCI_MATCH_ANY;
+	match.subdevice_id =	PCI_MATCH_ANY;
+	match.device_id =	PCI_MATCH_ANY;
+	match.device_class =	NVME_CLASS_CODE;
+	match.device_class_mask = 0xFFFFFF;
+
+	pci_dev_iter = pci_id_match_iterator_create(&match);
+
+	rc = 0;
+	while ((pci_dev = pci_device_next(pci_dev_iter))) {
+		struct dev *dev;
+
+		if (pci_device_has_non_uio_driver(pci_dev)) {
+			fprintf(stderr, "non-null kernel driver attached to nvme\n");
+			fprintf(stderr, " controller at pci bdf %d:%d:%d\n",
+				pci_dev->bus, pci_dev->dev, pci_dev->func);
+			fprintf(stderr, " skipping...\n");
+			continue;
+		}
+
+		pci_device_probe(pci_dev);
+
+		/* add to dev list */
+		dev = &devs[num_devs++];
+
+		dev->pci_dev = pci_dev;
+
+		snprintf(dev->name, sizeof(dev->name), "%04X:%02X:%02X.%02X",
+			 pci_dev->domain, pci_dev->bus, pci_dev->dev, pci_dev->func);
+
+		printf("%s: attaching NVMe driver...\n", dev->name);
+
+		dev->ctrlr = nvme_attach(pci_dev);
+		if (dev->ctrlr == NULL) {
+			fprintf(stderr, "failed to attach to NVMe controller %s\n", dev->name);
+			rc = 1;
+			continue; /* TODO: just abort */
+		}
+	}
+	pci_iterator_destroy(pci_dev_iter);
+
+	if (num_devs) {
+		rc = nvme_register_io_thread();
+		if (rc != 0)
+			return rc;
+	}
+
+	foreach_dev(iter) {
+		if (writev_readv_tests(iter, build_io_request_1)
+		    || writev_readv_tests(iter, build_io_request_2)
+		    || writev_readv_tests(iter, build_io_request_3)
+		    || writev_readv_tests(iter, build_io_request_4)
+		    || writev_readv_tests(iter, build_io_request_5)
+		    || writev_readv_tests(iter, build_io_request_6)) {
+			printf("%s: failed sgl tests\n", iter->name);
+		}
+	}
+
+	printf("Cleaning up...\n");
+
+	for (i = 0; i < num_devs; i++) {
+		struct dev *dev = &devs[i];
+
+		nvme_detach(dev->ctrlr);
+	}
+
+	if (num_devs)
+		nvme_unregister_io_thread();
+
+	return rc;
+}