diff --git a/include/spdk/nvmf.h b/include/spdk/nvmf.h
index 66462b403..81187aa26 100644
--- a/include/spdk/nvmf.h
+++ b/include/spdk/nvmf.h
@@ -67,6 +67,7 @@ struct spdk_nvmf_tgt_opts {
 	uint32_t in_capsule_data_size;
 	uint32_t max_io_size;
 	uint32_t max_subsystems;
+	uint32_t io_unit_size;
 };
 /**
  * Initialize the default value of opts.
diff --git a/lib/event/subsystems/nvmf/conf.c b/lib/event/subsystems/nvmf/conf.c
index ca53e0a5c..8f6dea431 100644
--- a/lib/event/subsystems/nvmf/conf.c
+++ b/lib/event/subsystems/nvmf/conf.c
@@ -72,6 +72,7 @@ spdk_nvmf_read_config_file_params(struct spdk_conf_section *sp,
 	int max_queues_per_sess;
 	int in_capsule_data_size;
 	int max_io_size;
+	int io_unit_size;
 	int acceptor_poll_rate;
 
 	max_queue_depth = spdk_conf_section_get_intval(sp, "MaxQueueDepth");
@@ -94,6 +95,11 @@ spdk_nvmf_read_config_file_params(struct spdk_conf_section *sp,
 		opts->max_io_size = max_io_size;
 	}
 
+	io_unit_size = spdk_conf_section_get_intval(sp, "IOUnitSize");
+	if (io_unit_size >= 0) {
+		opts->io_unit_size = io_unit_size;
+	}
+
 	acceptor_poll_rate = spdk_conf_section_get_intval(sp, "AcceptorPollRate");
 	if (acceptor_poll_rate >= 0) {
 		g_spdk_nvmf_tgt_conf.acceptor_poll_rate = acceptor_poll_rate;
diff --git a/lib/nvmf/ctrlr_bdev.c b/lib/nvmf/ctrlr_bdev.c
index 93c7423dc..3db87487e 100644
--- a/lib/nvmf/ctrlr_bdev.c
+++ b/lib/nvmf/ctrlr_bdev.c
@@ -180,8 +180,8 @@ nvmf_bdev_ctrlr_read_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
 	}
 
 	spdk_trace_record(TRACE_NVMF_LIB_READ_START, 0, 0, (uint64_t)req, 0);
-	if (spdk_unlikely(spdk_bdev_read_blocks(desc, ch, req->data, start_lba, num_blocks,
-						nvmf_bdev_ctrlr_complete_cmd, req))) {
+	if (spdk_unlikely(spdk_bdev_readv_blocks(desc, ch, req->iov, req->iovcnt, start_lba, num_blocks,
+			  nvmf_bdev_ctrlr_complete_cmd, req))) {
 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
@@ -219,7 +219,7 @@ nvmf_bdev_ctrlr_write_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
 	}
 
 	spdk_trace_record(TRACE_NVMF_LIB_WRITE_START, 0, 0, (uint64_t)req, 0);
-	if (spdk_unlikely(spdk_bdev_write_blocks(desc, ch, req->data, start_lba, num_blocks,
+	if (spdk_unlikely(spdk_bdev_writev_blocks(desc, ch, req->iov, req->iovcnt, start_lba, num_blocks,
 			  nvmf_bdev_ctrlr_complete_cmd, req))) {
 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
diff --git a/lib/nvmf/nvmf.c b/lib/nvmf/nvmf.c
index eb04f5c5e..27a60e36d 100644
--- a/lib/nvmf/nvmf.c
+++ b/lib/nvmf/nvmf.c
@@ -51,6 +51,7 @@ SPDK_LOG_REGISTER_COMPONENT("nvmf", SPDK_LOG_NVMF)
 #define SPDK_NVMF_DEFAULT_IN_CAPSULE_DATA_SIZE 4096
 #define SPDK_NVMF_DEFAULT_MAX_IO_SIZE 131072
 #define SPDK_NVMF_DEFAULT_MAX_SUBSYSTEMS 1024
+#define SPDK_NVMF_DEFAULT_IO_UNIT_SIZE 131072
 
 void
 spdk_nvmf_tgt_opts_init(struct spdk_nvmf_tgt_opts *opts)
@@ -60,6 +61,7 @@ spdk_nvmf_tgt_opts_init(struct spdk_nvmf_tgt_opts *opts)
 	opts->in_capsule_data_size = SPDK_NVMF_DEFAULT_IN_CAPSULE_DATA_SIZE;
 	opts->max_io_size = SPDK_NVMF_DEFAULT_MAX_IO_SIZE;
 	opts->max_subsystems = SPDK_NVMF_DEFAULT_MAX_SUBSYSTEMS;
+	opts->io_unit_size = SPDK_NVMF_DEFAULT_IO_UNIT_SIZE;
 }
 
 static int
@@ -165,6 +167,14 @@ spdk_nvmf_tgt_create(struct spdk_nvmf_tgt_opts *opts)
 		tgt->opts = *opts;
 	}
 
+	if ((tgt->opts.max_io_size % tgt->opts.io_unit_size != 0) ||
+	    (tgt->opts.max_io_size / tgt->opts.io_unit_size > SPDK_NVMF_MAX_SGL_ENTRIES)) {
+		SPDK_ERRLOG("Unsupported IO size, MaxIO:%d, UnitIO:%d\n", tgt->opts.max_io_size,
+			    tgt->opts.io_unit_size);
+		free(tgt);
+		return NULL;
+	}
+
 	tgt->discovery_genctr = 0;
 	tgt->discovery_log_page = NULL;
 	tgt->discovery_log_page_size = 0;
@@ -187,6 +197,7 @@ spdk_nvmf_tgt_create(struct spdk_nvmf_tgt_opts *opts)
 	SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Max In Capsule Data: %d bytes\n",
 		      tgt->opts.in_capsule_data_size);
 	SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Max I/O Size: %d bytes\n", tgt->opts.max_io_size);
+	SPDK_DEBUGLOG(SPDK_LOG_NVMF, "I/O Unit Size: %d bytes\n", tgt->opts.io_unit_size);
 
 	return tgt;
 }
diff --git a/lib/nvmf/nvmf_internal.h b/lib/nvmf/nvmf_internal.h
index a0601b9ee..8958449fd 100644
--- a/lib/nvmf/nvmf_internal.h
+++ b/lib/nvmf/nvmf_internal.h
@@ -43,6 +43,8 @@
 #include "spdk/queue.h"
 #include "spdk/util.h"
 
+#define SPDK_NVMF_MAX_SGL_ENTRIES	16
+
 enum spdk_nvmf_subsystem_state {
 	SPDK_NVMF_SUBSYSTEM_INACTIVE = 0,
 	SPDK_NVMF_SUBSYSTEM_ACTIVATING,
@@ -138,6 +140,8 @@ struct spdk_nvmf_request {
 	void				*data;
 	union nvmf_h2c_msg		*cmd;
 	union nvmf_c2h_msg		*rsp;
+	struct iovec			iov[SPDK_NVMF_MAX_SGL_ENTRIES];
+	uint32_t			iovcnt;
 
 	TAILQ_ENTRY(spdk_nvmf_request)	link;
 };
diff --git a/lib/nvmf/rdma.c b/lib/nvmf/rdma.c
index 4ab392610..9b39cb1fe 100644
--- a/lib/nvmf/rdma.c
+++ b/lib/nvmf/rdma.c
@@ -55,6 +55,7 @@
  */
 #define NVMF_DEFAULT_TX_SGE		1
 #define NVMF_DEFAULT_RX_SGE		2
+#define NVMF_DEFAULT_DATA_SGE		16
 
 /* The RDMA completion queue size */
 #define NVMF_RDMA_CQ_SIZE	4096
@@ -129,7 +130,7 @@ struct spdk_nvmf_rdma_recv {
 
 struct spdk_nvmf_rdma_request {
 	struct spdk_nvmf_request		req;
-	void					*data_from_pool;
+	bool					data_from_pool;
 
 	enum spdk_nvmf_rdma_request_state	state;
 
@@ -142,7 +143,8 @@ struct spdk_nvmf_rdma_request {
 
 	struct {
 		struct ibv_send_wr		wr;
-		struct ibv_sge			sgl[NVMF_DEFAULT_TX_SGE];
+		struct ibv_sge			sgl[SPDK_NVMF_MAX_SGL_ENTRIES];
+		void				*buffers[SPDK_NVMF_MAX_SGL_ENTRIES];
 	} data;
 
 	TAILQ_ENTRY(spdk_nvmf_rdma_request)	link;
@@ -259,6 +261,7 @@ struct spdk_nvmf_rdma_transport {
 
 	uint16_t			max_queue_depth;
 	uint32_t			max_io_size;
+	uint32_t			io_unit_size;
 	uint32_t			in_capsule_data_size;
 
 	TAILQ_HEAD(, spdk_nvmf_rdma_device)	devices;
@@ -345,7 +348,7 @@ spdk_nvmf_rdma_qpair_initialize(struct spdk_nvmf_qpair *qpair)
 	attr.recv_cq		= rqpair->poller->cq;
 	attr.cap.max_send_wr	= rqpair->max_queue_depth * 2; /* SEND, READ, and WRITE operations */
 	attr.cap.max_recv_wr	= rqpair->max_queue_depth; /* RECV operations */
-	attr.cap.max_send_sge	= NVMF_DEFAULT_TX_SGE;
+	attr.cap.max_send_sge	= SPDK_NVMF_MAX_SGL_ENTRIES;
 	attr.cap.max_recv_sge	= NVMF_DEFAULT_RX_SGE;
 
 	rc = rdma_create_qp(rqpair->cm_id, NULL, &attr);
@@ -840,6 +843,55 @@ spdk_nvmf_rdma_request_get_xfer(struct spdk_nvmf_rdma_request *rdma_req)
 	return xfer;
 }
 
+static int
+spdk_nvmf_rdma_request_fill_iovs(struct spdk_nvmf_rdma_transport *rtransport,
+				 struct spdk_nvmf_rdma_device *device,
+				 struct spdk_nvmf_rdma_request *rdma_req)
+{
+	void		*buf = NULL;
+	uint32_t	length = rdma_req->req.length;
+	uint32_t	i = 0;
+
+	rdma_req->req.iovcnt = 0;
+	while (length) {
+		buf = spdk_mempool_get(rtransport->data_buf_pool);
+		if (!buf) {
+			goto nomem;
+		}
+
+		rdma_req->req.iov[i].iov_base = (void *)((uintptr_t)(buf + NVMF_DATA_BUFFER_MASK) &
+						~NVMF_DATA_BUFFER_MASK);
+		rdma_req->req.iov[i].iov_len  = spdk_min(length, rtransport->io_unit_size);
+		rdma_req->req.iovcnt++;
+		rdma_req->data.buffers[i] = buf;
+		rdma_req->data.wr.sg_list[i].addr = (uintptr_t)(rdma_req->req.iov[i].iov_base);
+		rdma_req->data.wr.sg_list[i].length = rdma_req->req.iov[i].iov_len;
+		rdma_req->data.wr.sg_list[i].lkey = ((struct ibv_mr *)spdk_mem_map_translate(device->map,
+						     (uint64_t)buf))->lkey;
+
+		length -= rdma_req->req.iov[i].iov_len;
+		i++;
+	}
+
+	rdma_req->data_from_pool = true;
+
+	return 0;
+
+nomem:
+	while (i) {
+		i--;
+		spdk_mempool_put(rtransport->data_buf_pool, rdma_req->req.iov[i].iov_base);
+		rdma_req->req.iov[i].iov_base = NULL;
+		rdma_req->req.iov[i].iov_len = 0;
+
+		rdma_req->data.wr.sg_list[i].addr = 0;
+		rdma_req->data.wr.sg_list[i].length = 0;
+		rdma_req->data.wr.sg_list[i].lkey = 0;
+	}
+	rdma_req->req.iovcnt = 0;
+	return -ENOMEM;
+}
+
 static int
 spdk_nvmf_rdma_request_parse_sgl(struct spdk_nvmf_rdma_transport *rtransport,
 				 struct spdk_nvmf_rdma_device *device,
@@ -863,26 +915,25 @@ spdk_nvmf_rdma_request_parse_sgl(struct spdk_nvmf_rdma_transport *rtransport,
 			return -1;
 		}
 
+		/* fill request length and populate iovs */
 		rdma_req->req.length = sgl->keyed.length;
-		rdma_req->data_from_pool = spdk_mempool_get(rtransport->data_buf_pool);
-		if (!rdma_req->data_from_pool) {
+
+		if (spdk_nvmf_rdma_request_fill_iovs(rtransport, device, rdma_req) < 0) {
 			/* No available buffers. Queue this request up. */
 			SPDK_DEBUGLOG(SPDK_LOG_RDMA, "No available large data buffers. Queueing request %p\n", rdma_req);
 			return 0;
 		}
-		/* AIO backend requires block size aligned data buffers,
-		 * 4KiB aligned data buffer should work for most devices.
-		 */
-		rdma_req->req.data = (void *)((uintptr_t)(rdma_req->data_from_pool + NVMF_DATA_BUFFER_MASK)
-					      & ~NVMF_DATA_BUFFER_MASK);
-		rdma_req->data.sgl[0].addr = (uintptr_t)rdma_req->req.data;
-		rdma_req->data.sgl[0].length = sgl->keyed.length;
-		rdma_req->data.sgl[0].lkey = ((struct ibv_mr *)spdk_mem_map_translate(device->map,
-					      (uint64_t)rdma_req->req.data))->lkey;
+
+		/* backward compatible */
+		rdma_req->req.data = rdma_req->req.iov[0].iov_base;
+
+		/* rdma wr specifics */
+		rdma_req->data.wr.num_sge = rdma_req->req.iovcnt;
 		rdma_req->data.wr.wr.rdma.rkey = sgl->keyed.key;
 		rdma_req->data.wr.wr.rdma.remote_addr = sgl->address;
 
-		SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Request %p took buffer from central pool\n", rdma_req);
+		SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Request %p took %d buffer/s from central pool\n", rdma_req,
+			      rdma_req->req.iovcnt);
 
 		return 0;
 	} else if (sgl->generic.type == SPDK_NVME_SGL_TYPE_DATA_BLOCK &&
@@ -909,8 +960,13 @@ spdk_nvmf_rdma_request_parse_sgl(struct spdk_nvmf_rdma_transport *rtransport,
 		}
 
 		rdma_req->req.data = rdma_req->recv->buf + offset;
-		rdma_req->data_from_pool = NULL;
+		rdma_req->data_from_pool = false;
 		rdma_req->req.length = sgl->unkeyed.length;
+
+		rdma_req->req.iov[0].iov_base = rdma_req->req.data;
+		rdma_req->req.iov[0].iov_len = rdma_req->req.length;
+		rdma_req->req.iovcnt = 1;
+
 		return 0;
 	}
 
@@ -998,7 +1054,7 @@ spdk_nvmf_rdma_request_process(struct spdk_nvmf_rdma_transport *rtransport,
 			/* If data is transferring from host to controller and the data didn't
 			 * arrive using in capsule data, we need to do a transfer from the host.
 			 */
-			if (rdma_req->req.xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER && rdma_req->data_from_pool != NULL) {
+			if (rdma_req->req.xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER && rdma_req->data_from_pool) {
 				rdma_req->state = RDMA_REQUEST_STATE_TRANSFER_PENDING_HOST_TO_CONTROLLER;
 				TAILQ_INSERT_TAIL(&rqpair->pending_rdma_rw_queue, rdma_req, link);
 				break;
@@ -1068,11 +1124,16 @@ spdk_nvmf_rdma_request_process(struct spdk_nvmf_rdma_transport *rtransport,
 			rqpair->cur_queue_depth--;
 
 			if (rdma_req->data_from_pool) {
-				/* Put the buffer back in the pool */
-				spdk_mempool_put(rtransport->data_buf_pool, rdma_req->data_from_pool);
-				rdma_req->data_from_pool = NULL;
+				/* Put the buffer/s back in the pool */
+				for (uint32_t i = 0; i < rdma_req->req.iovcnt; i++) {
+					spdk_mempool_put(rtransport->data_buf_pool, rdma_req->data.buffers[i]);
+					rdma_req->req.iov[i].iov_base = NULL;
+					rdma_req->data.buffers[i] = NULL;
+				}
+				rdma_req->data_from_pool = false;
 			}
 			rdma_req->req.length = 0;
+			rdma_req->req.iovcnt = 0;
 			rdma_req->req.data = NULL;
 			rdma_req->state = RDMA_REQUEST_STATE_FREE;
 			TAILQ_INSERT_TAIL(&rqpair->free_queue, rdma_req, link);
@@ -1098,6 +1159,7 @@ spdk_nvmf_rdma_create(struct spdk_nvmf_tgt *tgt)
 	struct ibv_context		**contexts;
 	uint32_t			i;
 	int				flag;
+	uint32_t			sge_count;
 
 	rtransport = calloc(1, sizeof(*rtransport));
 	if (!rtransport) {
@@ -1115,8 +1177,21 @@ spdk_nvmf_rdma_create(struct spdk_nvmf_tgt *tgt)
 
 	rtransport->max_queue_depth = tgt->opts.max_queue_depth;
 	rtransport->max_io_size = tgt->opts.max_io_size;
+	rtransport->io_unit_size = tgt->opts.io_unit_size;
 	rtransport->in_capsule_data_size = tgt->opts.in_capsule_data_size;
 
+	/* I/O unit size cannot be larger than max I/O size */
+	if (rtransport->io_unit_size > rtransport->max_io_size) {
+		rtransport->io_unit_size = rtransport->max_io_size;
+	}
+
+	sge_count = rtransport->max_io_size / rtransport->io_unit_size;
+	if (sge_count > SPDK_NVMF_MAX_SGL_ENTRIES) {
+		SPDK_ERRLOG("Unsupported IO Unit size specified, %d bytes\n", rtransport->io_unit_size);
+		free(rtransport);
+		return NULL;
+	}
+
 	rtransport->event_channel = rdma_create_event_channel();
 	if (rtransport->event_channel == NULL) {
 		SPDK_ERRLOG("rdma_create_event_channel() failed, %s\n", spdk_strerror(errno));
@@ -1134,7 +1209,7 @@ spdk_nvmf_rdma_create(struct spdk_nvmf_tgt *tgt)
 
 	rtransport->data_buf_pool = spdk_mempool_create("spdk_nvmf_rdma",
 				    rtransport->max_queue_depth * 4, /* The 4 is arbitrarily chosen. Needs to be configurable. */
-				    rtransport->max_io_size + NVMF_DATA_BUFFER_ALIGNMENT,
+				    rtransport->io_unit_size + NVMF_DATA_BUFFER_ALIGNMENT,
 				    SPDK_MEMPOOL_DEFAULT_CACHE_SIZE,
 				    SPDK_ENV_SOCKET_ID_ANY);
 	if (!rtransport->data_buf_pool) {
diff --git a/test/nvmf/nvmf.conf b/test/nvmf/nvmf.conf
index ac6a5d9e2..abe62e8dc 100644
--- a/test/nvmf/nvmf.conf
+++ b/test/nvmf/nvmf.conf
@@ -4,3 +4,4 @@
 
 [Nvmf]
   MaxQueuesPerSession 4
+  IOUnitSize 8192
diff --git a/test/unit/lib/nvmf/ctrlr_bdev.c/ctrlr_bdev_ut.c b/test/unit/lib/nvmf/ctrlr_bdev.c/ctrlr_bdev_ut.c
index b2dd759a9..51740fd4e 100644
--- a/test/unit/lib/nvmf/ctrlr_bdev.c/ctrlr_bdev_ut.c
+++ b/test/unit/lib/nvmf/ctrlr_bdev.c/ctrlr_bdev_ut.c
@@ -121,6 +121,15 @@ spdk_bdev_write_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
 	return 0;
 }
 
+int
+spdk_bdev_writev_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
+			struct iovec *iov, int iovcnt,
+			uint64_t offset_blocks, uint64_t num_blocks,
+			spdk_bdev_io_completion_cb cb, void *cb_arg)
+{
+	return 0;
+}
+
 int
 spdk_bdev_read_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, void *buf,
 		      uint64_t offset_blocks, uint64_t num_blocks,
@@ -129,6 +138,14 @@ spdk_bdev_read_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, v
 	return 0;
 }
 
+int spdk_bdev_readv_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
+			   struct iovec *iov, int iovcnt,
+			   uint64_t offset_blocks, uint64_t num_blocks,
+			   spdk_bdev_io_completion_cb cb, void *cb_arg)
+{
+	return 0;
+}
+
 int
 spdk_bdev_write_zeroes_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
 			      uint64_t offset_blocks, uint64_t num_blocks,