From 1dbf53eebfb4ef0884ebb59f5944b972b5f483f2 Mon Sep 17 00:00:00 2001
From: Piotr Pelplinski <piotr.pelplinski@intel.com>
Date: Thu, 2 Mar 2017 15:12:20 +0100
Subject: [PATCH] vhost:  add a library and app for userspace vhost-scsi
 processing

This patch adds a library, application and test scripts for extending
SPDK to present virtio-scsi controllers to QEMU-based VMs and
process I/O submitted to devices attached to those controllers.
This functionality is dependent on QEMU patches to enable
vhost-scsi in userspace - those patches are currently working their
way through the QEMU mailing list, but temporary patches to enable
this functionality in QEMU will be made available shortly through the
SPDK github repository.

Signed-off-by: Jim Harris <james.r.harris@intel.com>
Signed-off-by: Krzysztof Jakimiak <krzysztof.jakimiak@intel.com>
Signed-off-by: Michal Kosciowski <michal.kosciowski@intel.com>
Signed-off-by: Karol Latecki <karolx.latecki@intel.com>
Signed-off-by: Piotr Pelplinski <piotr.pelplinski@intel.com>
Signed-off-by: Daniel Verkamp <daniel.verkamp@intel.com>
Signed-off-by: Pawel Wodkowski <pawelx.wodkowski@intel.com>
Signed-off-by: Tomasz Zawadzki <tomasz.zawadzki@intel.com>

Signed-off-by: Krzysztof Jakimiak <krzysztof.jakimiak@intel.com>
Change-Id: I138e4021f0ac4b1cd9a6e4041783cdf06e6f0efb
---
 app/Makefile                                  |    2 +-
 app/vhost/Makefile                            |   62 +
 app/vhost/vhost.c                             |  164 +++
 autotest.sh                                   |    4 +
 etc/spdk/vhost.conf.in                        |  133 ++
 include/spdk/vhost.h                          |   70 +
 lib/Makefile                                  |    2 +-
 lib/vhost/Makefile                            |   46 +
 lib/vhost/rte_vhost/Makefile                  |   44 +
 lib/vhost/rte_vhost/fd_man.c                  |  299 +++++
 lib/vhost/rte_vhost/fd_man.h                  |   67 +
 lib/vhost/rte_vhost/rte_virtio_net.h          |  193 +++
 lib/vhost/rte_vhost/socket.c                  |  619 +++++++++
 lib/vhost/rte_vhost/vhost.c                   |  429 ++++++
 lib/vhost/rte_vhost/vhost.h                   |  294 ++++
 lib/vhost/rte_vhost/vhost_user.c              | 1042 +++++++++++++++
 lib/vhost/rte_vhost/vhost_user.h              |  128 ++
 lib/vhost/rte_vhost/virtio_net.c              | 1186 +++++++++++++++++
 lib/vhost/task.c                              |  162 +++
 lib/vhost/task.h                              |   69 +
 lib/vhost/vhost.c                             | 1161 ++++++++++++++++
 lib/vhost/vhost_rpc.c                         |  215 +++
 mk/spdk.app.mk                                |    3 +-
 scripts/check_format.sh                       |    8 +-
 scripts/rpc.py                                |   13 +-
 test/vhost/ext4test/ext4connect.sh            |   55 +
 test/vhost/ext4test/ext4start.sh              |   97 ++
 test/vhost/ext4test/spdk_vm_base.xml          |   69 +
 test/vhost/ext4test/spdk_vnet_base.xml        |   11 +
 test/vhost/ext4test/vhost.conf                |   47 +
 test/vhost/fiotest/README                     |   85 ++
 test/vhost/fiotest/autotest.config            |    5 +
 test/vhost/fiotest/autotest.sh                |  257 ++++
 test/vhost/fiotest/common.sh                  |  756 +++++++++++
 .../fiotest/fio_jobs/default_integrity.job    |   18 +
 .../fiotest/fio_jobs/default_performance.job  |   15 +
 test/vhost/fiotest/run_fio.py                 |  312 +++++
 test/vhost/fiotest/run_vhost.sh               |   49 +
 test/vhost/fiotest/vhost.conf                 |   41 +
 test/vhost/fiotest/vm_run.sh                  |   48 +
 test/vhost/fiotest/vm_setup.sh                |   78 ++
 test/vhost/fiotest/vm_shutdown.sh             |   65 +
 test/vhost/fiotest/vm_ssh.sh                  |   58 +
 test/vhost/spdk_vhost.sh                      |   40 +
 44 files changed, 8510 insertions(+), 11 deletions(-)
 create mode 100644 app/vhost/Makefile
 create mode 100644 app/vhost/vhost.c
 create mode 100644 etc/spdk/vhost.conf.in
 create mode 100644 include/spdk/vhost.h
 create mode 100644 lib/vhost/Makefile
 create mode 100644 lib/vhost/rte_vhost/Makefile
 create mode 100644 lib/vhost/rte_vhost/fd_man.c
 create mode 100644 lib/vhost/rte_vhost/fd_man.h
 create mode 100644 lib/vhost/rte_vhost/rte_virtio_net.h
 create mode 100644 lib/vhost/rte_vhost/socket.c
 create mode 100644 lib/vhost/rte_vhost/vhost.c
 create mode 100644 lib/vhost/rte_vhost/vhost.h
 create mode 100644 lib/vhost/rte_vhost/vhost_user.c
 create mode 100644 lib/vhost/rte_vhost/vhost_user.h
 create mode 100644 lib/vhost/rte_vhost/virtio_net.c
 create mode 100644 lib/vhost/task.c
 create mode 100644 lib/vhost/task.h
 create mode 100644 lib/vhost/vhost.c
 create mode 100644 lib/vhost/vhost_rpc.c
 create mode 100755 test/vhost/ext4test/ext4connect.sh
 create mode 100755 test/vhost/ext4test/ext4start.sh
 create mode 100644 test/vhost/ext4test/spdk_vm_base.xml
 create mode 100644 test/vhost/ext4test/spdk_vnet_base.xml
 create mode 100644 test/vhost/ext4test/vhost.conf
 create mode 100644 test/vhost/fiotest/README
 create mode 100644 test/vhost/fiotest/autotest.config
 create mode 100755 test/vhost/fiotest/autotest.sh
 create mode 100644 test/vhost/fiotest/common.sh
 create mode 100644 test/vhost/fiotest/fio_jobs/default_integrity.job
 create mode 100644 test/vhost/fiotest/fio_jobs/default_performance.job
 create mode 100755 test/vhost/fiotest/run_fio.py
 create mode 100755 test/vhost/fiotest/run_vhost.sh
 create mode 100644 test/vhost/fiotest/vhost.conf
 create mode 100755 test/vhost/fiotest/vm_run.sh
 create mode 100755 test/vhost/fiotest/vm_setup.sh
 create mode 100755 test/vhost/fiotest/vm_shutdown.sh
 create mode 100755 test/vhost/fiotest/vm_ssh.sh
 create mode 100755 test/vhost/spdk_vhost.sh

diff --git a/app/Makefile b/app/Makefile
index e5698ce41..31555099a 100644
--- a/app/Makefile
+++ b/app/Makefile
@@ -38,7 +38,7 @@ DIRS-y += trace
 DIRS-y += nvmf_tgt
 DIRS-y += iscsi_top
 ifeq ($(OS),Linux)
-DIRS-y += iscsi_tgt
+DIRS-y += iscsi_tgt vhost
 endif
 
 .PHONY: all clean $(DIRS-y)
diff --git a/app/vhost/Makefile b/app/vhost/Makefile
new file mode 100644
index 000000000..aff865fb5
--- /dev/null
+++ b/app/vhost/Makefile
@@ -0,0 +1,62 @@
+#
+#  BSD LICENSE
+#
+#  Copyright (c) Intel Corporation.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+include $(SPDK_ROOT_DIR)/mk/spdk.app.mk
+include $(SPDK_ROOT_DIR)/mk/spdk.modules.mk
+
+APP = vhost
+
+CFLAGS += $(ENV_CFLAGS)
+
+C_SRCS := vhost.c
+
+SPDK_LIB_LIST = jsonrpc json rpc bdev_rpc bdev scsi net copy trace conf
+SPDK_LIB_LIST += util log log_rpc event app_rpc
+SPDK_LIB_LIST += vhost rte_vhost
+
+LIBS += $(BLOCKDEV_MODULES_LINKER_ARGS) \
+	$(COPY_MODULES_LINKER_ARGS)
+LIBS += $(SPDK_LIB_LINKER_ARGS)
+LIBS += $(ENV_LINKER_ARGS)
+
+all : $(APP)
+
+$(APP) : $(OBJS) $(SPDK_LIB_FILES) $(ENV_LIBS) $(BLOCKDEV_MODULES_FILES) $(COPY_MODULES_FILES)
+	$(LINK_C)
+
+clean :
+	$(CLEAN_C) $(APP)
+
+include $(SPDK_ROOT_DIR)/mk/spdk.deps.mk
diff --git a/app/vhost/vhost.c b/app/vhost/vhost.c
new file mode 100644
index 000000000..a5cc795e3
--- /dev/null
+++ b/app/vhost/vhost.c
@@ -0,0 +1,164 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <getopt.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <unistd.h>
+
+#include "spdk/log.h"
+#include "spdk/conf.h"
+#include "spdk/event.h"
+
+#include "spdk/vhost.h"
+
+
+#define SPDK_VHOST_DEFAULT_CONFIG "/usr/local/etc/spdk/vhost.conf"
+#define SPDK_VHOST_DEFAULT_ENABLE_COREDUMP true
+#define SPDK_VHOST_DEFAULT_MEM_SIZE 1024
+
+static void
+vhost_app_opts_init(struct spdk_app_opts *opts)
+{
+	spdk_app_opts_init(opts);
+	opts->name = "vhost";
+	opts->config_file = SPDK_VHOST_DEFAULT_CONFIG;
+	opts->dpdk_mem_size = SPDK_VHOST_DEFAULT_MEM_SIZE;
+}
+
+static void
+usage(char *executable_name)
+{
+	struct spdk_app_opts defaults;
+
+	vhost_app_opts_init(&defaults);
+
+	printf("%s [options]\n", executable_name);
+	printf("options:\n");
+	printf(" -c config  config file (default: %s)\n", defaults.config_file);
+	printf(" -e mask    tracepoint group mask for spdk trace buffers (default: 0x0)\n");
+	printf(" -m mask    reactor core mask (default: 0x1)\n");
+	printf(" -l facility use specific syslog facility (default: %s)\n", defaults.log_facility);
+	printf(" -n channel number of memory channels used for DPDK\n");
+	printf(" -p core    master (primary) core for DPDK\n");
+	printf(" -s size    memory size in MB for DPDK (default: %dMB)\n", defaults.dpdk_mem_size);
+	printf(" -S dir     directory where to create vhost sockets (default: pwd)\n");
+	spdk_tracelog_usage(stdout, "-t");
+	printf(" -h         show this usage\n");
+	printf(" -d         disable coredump file enabling\n");
+	printf(" -q         disable notice level logging to stderr\n");
+}
+
+int
+main(int argc, char *argv[])
+{
+	struct spdk_app_opts opts = {};
+	char ch;
+	int rc;
+	const char *socket_path = NULL;
+
+	vhost_app_opts_init(&opts);
+
+	while ((ch = getopt(argc, argv, "c:de:l:m:p:qs:S:t:h")) != -1) {
+		switch (ch) {
+		case 'c':
+			opts.config_file = optarg;
+			break;
+		case 'd':
+			opts.enable_coredump = false;
+			break;
+		case 'e':
+			opts.tpoint_group_mask = optarg;
+			break;
+		case 'h':
+			usage(argv[0]);
+			exit(EXIT_SUCCESS);
+		case 'l':
+			opts.log_facility = optarg;
+			break;
+		case 'm':
+			opts.reactor_mask = optarg;
+			break;
+		case 'p':
+			opts.dpdk_master_core = strtoul(optarg, NULL, 10);
+			break;
+		case 'q':
+			spdk_g_notice_stderr_flag = 0;
+			break;
+		case 's':
+			opts.dpdk_mem_size = strtoul(optarg, NULL, 10);
+			break;
+		case 'S':
+			socket_path = optarg;
+			break;
+		case 't':
+			rc = spdk_log_set_trace_flag(optarg);
+			if (rc < 0) {
+				fprintf(stderr, "unknown flag\n");
+				usage(argv[0]);
+				exit(EXIT_FAILURE);
+			}
+#ifndef DEBUG
+			fprintf(stderr, "%s must be rebuilt with CONFIG_DEBUG=y for -t flag.\n",
+				argv[0]);
+			usage(argv[0]);
+			exit(EXIT_FAILURE);
+#endif
+			break;
+		default:
+			fprintf(stderr, "%s Unknown option '-%c'.\n", argv[0], ch);
+			usage(argv[0]);
+			exit(EXIT_FAILURE);
+		}
+	}
+
+	if (spdk_g_notice_stderr_flag == 1 &&
+	    isatty(STDERR_FILENO) &&
+	    !strncmp(ttyname(STDERR_FILENO), "/dev/tty", strlen("/dev/tty"))) {
+		printf("Warning: printing stderr to console terminal without -q option specified.\n");
+		printf("Suggest using -q to disable logging to stderr and monitor syslog, or\n");
+		printf("redirect stderr to a file.\n");
+		printf("(Delaying for 10 seconds...)\n");
+		sleep(10);
+	}
+
+	opts.shutdown_cb = spdk_vhost_shutdown_cb;
+	spdk_app_init(&opts);
+
+	/* Blocks until the application is exiting */
+	rc = spdk_app_start(spdk_vhost_startup, (void *)socket_path, NULL);
+
+	spdk_app_fini();
+
+	return rc;
+}
diff --git a/autotest.sh b/autotest.sh
index 1e7294042..2483ba20a 100755
--- a/autotest.sh
+++ b/autotest.sh
@@ -137,6 +137,10 @@ timing_exit host
 
 timing_exit nvmf
 
+timing_enter vhost
+run_test ./test/vhost/spdk_vhost.sh --integrity
+timing_exit vhost
+
 timing_enter cleanup
 rbd_cleanup
 ./scripts/setup.sh reset
diff --git a/etc/spdk/vhost.conf.in b/etc/spdk/vhost.conf.in
new file mode 100644
index 000000000..fb258e3ad
--- /dev/null
+++ b/etc/spdk/vhost.conf.in
@@ -0,0 +1,133 @@
+# SPDK vhost configuration file
+#
+# Please write all parameters using ASCII.
+# The parameter must be quoted if it includes whitespace.
+
+# Configuration syntax:
+# Leading whitespace is ignored.
+# Lines starting with '#' are comments.
+# Lines ending with '\' are concatenated with the next line.
+# Bracketed ([]) names define sections
+
+[Global]
+  # Instance ID for multi-process support
+  # Default: 0
+  #InstanceID 0
+
+  # Users can restrict work items to only run on certain cores by
+  #  specifying a ReactorMask.  Default is to allow work items to run
+  #  on core 0.
+  #ReactorMask 0xFFFF
+
+  # Tracepoint group mask for spdk trace buffers
+  # Default: 0x0 (all tracepoint groups disabled)
+  # Set to 0xFFFFFFFFFFFFFFFF to enable all tracepoint groups.
+  #TpointGroupMask 0x0
+
+  # syslog facility
+   LogFacility "local7"
+
+[Rpc]
+  # Defines whether SPDK vhost will enable configuration via RPC.
+  # Default is disabled.  Note that the RPC interface is not
+  # authenticated, so users should be careful about enabling
+  # RPC in non-trusted environments.
+  Enable No
+  # Listen address for the RPC service.
+  # May be an IP address or an absolute path to a Unix socket.
+  Listen 127.0.0.1
+
+# Users may not want to use offload even it is available.
+# Users may use the whitelist to initialize specified devices, IDS
+#  uses BUS:DEVICE.FUNCTION to identify each Ioat channel.
+[Ioat]
+  Disable Yes
+  #Whitelist 00:04.0
+  #Whitelist 00:04.1
+
+# Users must change this section to match the /dev/sdX devices to be
+#  exported as vhost scsi drives. The devices are accessed using Linux AIO.
+[AIO]
+  #AIO /dev/sdb
+  #AIO /dev/sdc
+
+# Users may change this section to create a different number or size of
+#  malloc LUNs.
+# If the system has hardware DMA engine, it will use an IOAT
+# (i.e. Crystal Beach DMA) channel to do the copy instead of memcpy.
+# Of course, users can disable offload even it is available.
+[Malloc]
+  # Number of Malloc targets
+  NumberOfLuns 3
+  # Malloc targets are 128M
+  LunSizeInMB 128
+  # Block size. Default is 512 bytes.
+  BlockSize 4096
+
+# NVMe configuration options
+[Nvme]
+  # NVMe Device Whitelist
+  # Users may specify which NVMe devices to claim by their PCI
+  # domain, bus, device, and function. The format is dddd:bb:dd.f, which is
+  # the same format displayed by lspci or in /sys/bus/pci/devices. The second
+  # argument is a "name" for the device that can be anything. The name
+  # is referenced later in the Subsystem section.
+  #
+  # Alternatively, the user can specify ClaimAllDevices. All
+  # NVMe devices will be claimed and named Nvme0, Nvme1, etc.
+  #BDF 0000:81:00.0 Nvme0
+  #BDF 0000:01:00.0 Nvme1
+  ClaimAllDevices
+
+  # The number of attempts per I/O when an I/O fails. Do not include
+  # this key to get the default behavior.
+  NvmeRetryCount 4
+  # The maximum number of NVMe controllers to claim. Do not include this key to
+  # claim all of them.
+  NumControllers 2
+  # Registers the application to receive timeout callback and to reset the controller.
+  ResetControllerOnTimeout Yes
+  # Timeout value.
+  NvmeTimeoutValue 30
+  # Set how often the admin queue is polled for asynchronous events.
+  # Units in microseconds.
+  AdminPollRate 100000
+
+# The Split virtual block device slices block devices into multiple smaller bdevs.
+[Split]
+  # Syntax:
+  #   Split <bdev> <count> [<size_in_megabytes>]
+  #
+  # Split Nvme1n1 into two equally-sized portions, Nvme1n1p0 and Nvme1n1p1
+  #Split Nvme1n1 2
+
+  # Split Malloc2 into eight 1-megabyte portions, Malloc2p0 ... Malloc2p7,
+  # leaving the rest of the device inaccessible
+  #Split Malloc2 8 1
+
+# Vhost scsi controller configuration
+# Users should change the VhostScsi section(s) below to match the desired
+# vhost configuration.
+# Name is minimum required
+[VhostScsi0]
+  # Define name for controller
+  Name vhost.0
+  # Assign devices from backend
+  # Use the first malloc device
+  Dev0 Malloc0
+  # Use the first AIO device
+  #Dev1 AIO0
+  # Use the frist Nvme device
+  #Dev2 Nvme0n1
+  # Use the third partition from second Nvme device
+  #Dev3 Nvme1n1p2
+
+  # Start the poller for this vhost controller on one of the cores in
+  #  this cpumask.  By default, it not specified, will use any core in the
+  #  SPDK process.
+  #Cpumask 0x1
+
+#[VhostScsi1]
+#  Name vhost.1
+#  Dev0 AIO1
+#  Cpumask 0x1
diff --git a/include/spdk/vhost.h b/include/spdk/vhost.h
new file mode 100644
index 000000000..75e362f57
--- /dev/null
+++ b/include/spdk/vhost.h
@@ -0,0 +1,70 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ *  \file
+ *  SPDK vhost
+ */
+
+#ifndef SPDK_VHOST_H
+#define SPDK_VHOST_H
+
+#include "spdk/event.h"
+
+#define SPDK_VHOST_SCSI_CTRLR_MAX_DEVS 8
+
+/**
+ * \param event event object. event arg1 is optional path to vhost socket.
+ */
+void spdk_vhost_startup(void *arg1, void *arg2);
+void spdk_vhost_shutdown_cb(void);
+
+/* Forward declaration */
+struct spdk_vhost_scsi_ctrlr;
+
+/**
+ * Get handle to next controller.
+ * \param prev Previous controller or NULL to get first one.
+ * \return handle to next controller ot NULL if prev was the last one.
+ */
+struct spdk_vhost_scsi_ctrlr *spdk_vhost_scsi_ctrlr_next(struct spdk_vhost_scsi_ctrlr *prev);
+
+const char *spdk_vhost_scsi_ctrlr_get_name(struct spdk_vhost_scsi_ctrlr *ctrl);
+uint64_t spdk_vhost_scsi_ctrlr_get_cpumask(struct spdk_vhost_scsi_ctrlr *ctrl);
+int spdk_vhost_scsi_ctrlr_construct(const char *name, uint64_t cpumask);
+int spdk_vhost_parse_core_mask(const char *mask, uint64_t *cpumask);
+struct spdk_scsi_dev *spdk_vhost_scsi_ctrlr_get_dev(struct spdk_vhost_scsi_ctrlr *ctrl,
+		uint8_t num);
+int spdk_vhost_scsi_ctrlr_add_dev(const char *name, unsigned scsi_dev_num, const char *lun_name);
+
+#endif /* SPDK_VHOST_H */
diff --git a/lib/Makefile b/lib/Makefile
index 3f86753b9..b8f54e0d9 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -37,7 +37,7 @@ include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
 DIRS-y += bdev conf copy cunit event json jsonrpc \
           log env_dpdk net rpc trace util nvme nvmf scsi ioat
 ifeq ($(OS),Linux)
-DIRS-y += iscsi
+DIRS-y += iscsi vhost
 endif
 
 
diff --git a/lib/vhost/Makefile b/lib/vhost/Makefile
new file mode 100644
index 000000000..bbf38fc3d
--- /dev/null
+++ b/lib/vhost/Makefile
@@ -0,0 +1,46 @@
+#
+#  BSD LICENSE
+#
+#  Copyright (c) Intel Corporation.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+CFLAGS += -Irte_vhost
+CFLAGS += $(ENV_CFLAGS)
+
+C_SRCS = task.c vhost.c vhost_rpc.c
+
+LIBNAME = vhost
+
+DIRS-y += rte_vhost
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/lib/vhost/rte_vhost/Makefile b/lib/vhost/rte_vhost/Makefile
new file mode 100644
index 000000000..336425818
--- /dev/null
+++ b/lib/vhost/rte_vhost/Makefile
@@ -0,0 +1,44 @@
+#
+#  BSD LICENSE
+#
+#  Copyright (c) Intel Corporation.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+CFLAGS += $(ENV_CFLAGS)
+
+# These are the DPDK vhost files copied (for now) into SPDK
+C_SRCS += fd_man.c socket.c vhost_user.c virtio_net.c vhost.c
+
+LIBNAME = rte_vhost
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/lib/vhost/rte_vhost/fd_man.c b/lib/vhost/rte_vhost/fd_man.c
new file mode 100644
index 000000000..2d3eeb7d7
--- /dev/null
+++ b/lib/vhost/rte_vhost/fd_man.c
@@ -0,0 +1,299 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/socket.h>
+#include <sys/select.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <rte_common.h>
+#include <rte_log.h>
+
+#include "fd_man.h"
+
+/**
+ * Returns the index in the fdset for a given fd.
+ * If fd is -1, it means to search for a free entry.
+ * @return
+ *   index for the fd, or -1 if fd isn't in the fdset.
+ */
+static int
+fdset_find_fd(struct fdset *pfdset, int fd)
+{
+	int i;
+
+	if (pfdset == NULL)
+		return -1;
+
+	for (i = 0; i < MAX_FDS && pfdset->fd[i].fd != fd; i++)
+		;
+
+	return i ==  MAX_FDS ? -1 : i;
+}
+
+static int
+fdset_find_free_slot(struct fdset *pfdset)
+{
+	return fdset_find_fd(pfdset, -1);
+}
+
+static int
+fdset_add_fd(struct fdset  *pfdset, int idx, int fd,
+	fd_cb rcb, fd_cb wcb, void *dat)
+{
+	struct fdentry *pfdentry;
+
+	if (pfdset == NULL || idx >= MAX_FDS || fd >= FD_SETSIZE)
+		return -1;
+
+	pfdentry = &pfdset->fd[idx];
+	pfdentry->fd = fd;
+	pfdentry->rcb = rcb;
+	pfdentry->wcb = wcb;
+	pfdentry->dat = dat;
+
+	return 0;
+}
+
+/**
+ * Fill the read/write fd_set with the fds in the fdset.
+ * @return
+ *  the maximum fds filled in the read/write fd_set.
+ */
+static int
+fdset_fill(fd_set *rfset, fd_set *wfset, struct fdset *pfdset)
+{
+	struct fdentry *pfdentry;
+	int i, maxfds = -1;
+	int num = MAX_FDS;
+
+	if (pfdset == NULL)
+		return -1;
+
+	for (i = 0; i < num; i++) {
+		pfdentry = &pfdset->fd[i];
+		if (pfdentry->fd != -1) {
+			int added = 0;
+			if (pfdentry->rcb && rfset) {
+				FD_SET(pfdentry->fd, rfset);
+				added = 1;
+			}
+			if (pfdentry->wcb && wfset) {
+				FD_SET(pfdentry->fd, wfset);
+				added = 1;
+			}
+			if (added)
+				maxfds = pfdentry->fd < maxfds ?
+					maxfds : pfdentry->fd;
+		}
+	}
+	return maxfds;
+}
+
+void
+fdset_init(struct fdset *pfdset)
+{
+	int i;
+
+	if (pfdset == NULL)
+		return;
+
+	for (i = 0; i < MAX_FDS; i++) {
+		pfdset->fd[i].fd = -1;
+		pfdset->fd[i].dat = NULL;
+	}
+	pfdset->num = 0;
+}
+
+/**
+ * Register the fd in the fdset with read/write handler and context.
+ */
+int
+fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, void *dat)
+{
+	int i;
+
+	if (pfdset == NULL || fd == -1)
+		return -1;
+
+	pthread_mutex_lock(&pfdset->fd_mutex);
+
+	/* Find a free slot in the list. */
+	i = fdset_find_free_slot(pfdset);
+	if (i == -1 || fdset_add_fd(pfdset, i, fd, rcb, wcb, dat) < 0) {
+		pthread_mutex_unlock(&pfdset->fd_mutex);
+		return -2;
+	}
+
+	pfdset->num++;
+
+	pthread_mutex_unlock(&pfdset->fd_mutex);
+
+	return 0;
+}
+
+/**
+ *  Unregister the fd from the fdset.
+ *  Returns context of a given fd or NULL.
+ */
+void *
+fdset_del(struct fdset *pfdset, int fd)
+{
+	int i;
+	void *dat = NULL;
+
+	if (pfdset == NULL || fd == -1)
+		return NULL;
+
+	do {
+		pthread_mutex_lock(&pfdset->fd_mutex);
+
+		i = fdset_find_fd(pfdset, fd);
+		if (i != -1 && pfdset->fd[i].busy == 0) {
+			/* busy indicates r/wcb is executing! */
+			dat = pfdset->fd[i].dat;
+			pfdset->fd[i].fd = -1;
+			pfdset->fd[i].rcb = pfdset->fd[i].wcb = NULL;
+			pfdset->fd[i].dat = NULL;
+			pfdset->num--;
+			i = -1;
+		}
+		pthread_mutex_unlock(&pfdset->fd_mutex);
+	} while (i != -1);
+
+	return dat;
+}
+
+/**
+ *  Unregister the fd at the specified slot from the fdset.
+ */
+static void
+fdset_del_slot(struct fdset *pfdset, int index)
+{
+	if (pfdset == NULL || index < 0 || index >= MAX_FDS)
+		return;
+
+	pthread_mutex_lock(&pfdset->fd_mutex);
+
+	pfdset->fd[index].fd = -1;
+	pfdset->fd[index].rcb = pfdset->fd[index].wcb = NULL;
+	pfdset->fd[index].dat = NULL;
+	pfdset->num--;
+
+	pthread_mutex_unlock(&pfdset->fd_mutex);
+}
+
+/**
+ * This functions runs in infinite blocking loop until there is no fd in
+ * pfdset. It calls corresponding r/w handler if there is event on the fd.
+ *
+ * Before the callback is called, we set the flag to busy status; If other
+ * thread(now rte_vhost_driver_unregister) calls fdset_del concurrently, it
+ * will wait until the flag is reset to zero(which indicates the callback is
+ * finished), then it could free the context after fdset_del.
+ */
+void
+fdset_event_dispatch(struct fdset *pfdset)
+{
+	fd_set rfds, wfds;
+	int i, maxfds;
+	struct fdentry *pfdentry;
+	int num = MAX_FDS;
+	fd_cb rcb, wcb;
+	void *dat;
+	int fd;
+	int remove1, remove2;
+	int ret;
+
+	if (pfdset == NULL)
+		return;
+
+	while (1) {
+		struct timeval tv;
+		tv.tv_sec = 1;
+		tv.tv_usec = 0;
+		FD_ZERO(&rfds);
+		FD_ZERO(&wfds);
+		pthread_mutex_lock(&pfdset->fd_mutex);
+
+		maxfds = fdset_fill(&rfds, &wfds, pfdset);
+
+		pthread_mutex_unlock(&pfdset->fd_mutex);
+
+		/*
+		 * When select is blocked, other threads might unregister
+		 * listenfds from and register new listenfds into fdset.
+		 * When select returns, the entries for listenfds in the fdset
+		 * might have been updated. It is ok if there is unwanted call
+		 * for new listenfds.
+		 */
+		ret = select(maxfds + 1, &rfds, &wfds, NULL, &tv);
+		if (ret <= 0)
+			continue;
+
+		for (i = 0; i < num; i++) {
+			remove1 = remove2 = 0;
+			pthread_mutex_lock(&pfdset->fd_mutex);
+			pfdentry = &pfdset->fd[i];
+			fd = pfdentry->fd;
+			rcb = pfdentry->rcb;
+			wcb = pfdentry->wcb;
+			dat = pfdentry->dat;
+			pfdentry->busy = 1;
+			pthread_mutex_unlock(&pfdset->fd_mutex);
+			if (fd >= 0 && FD_ISSET(fd, &rfds) && rcb)
+				rcb(fd, dat, &remove1);
+			if (fd >= 0 && FD_ISSET(fd, &wfds) && wcb)
+				wcb(fd, dat, &remove2);
+			pfdentry->busy = 0;
+			/*
+			 * fdset_del needs to check busy flag.
+			 * We don't allow fdset_del to be called in callback
+			 * directly.
+			 */
+			/*
+			 * When we are to clean up the fd from fdset,
+			 * because the fd is closed in the cb,
+			 * the old fd val could be reused by when creates new
+			 * listen fd in another thread, we couldn't call
+			 * fd_set_del.
+			 */
+			if (remove1 || remove2)
+				fdset_del_slot(pfdset, i);
+		}
+	}
+}
diff --git a/lib/vhost/rte_vhost/fd_man.h b/lib/vhost/rte_vhost/fd_man.h
new file mode 100644
index 000000000..bd66ed1c5
--- /dev/null
+++ b/lib/vhost/rte_vhost/fd_man.h
@@ -0,0 +1,67 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _FD_MAN_H_
+#define _FD_MAN_H_
+#include <stdint.h>
+#include <pthread.h>
+
+#define MAX_FDS 1024
+
+typedef void (*fd_cb)(int fd, void *dat, int *remove);
+
+struct fdentry {
+	int fd;		/* -1 indicates this entry is empty */
+	fd_cb rcb;	/* callback when this fd is readable. */
+	fd_cb wcb;	/* callback when this fd is writeable.*/
+	void *dat;	/* fd context */
+	int busy;	/* whether this entry is being used in cb. */
+};
+
+struct fdset {
+	struct fdentry fd[MAX_FDS];
+	pthread_mutex_t fd_mutex;
+	int num;	/* current fd number of this fdset */
+};
+
+
+void fdset_init(struct fdset *pfdset);
+
+int fdset_add(struct fdset *pfdset, int fd,
+	fd_cb rcb, fd_cb wcb, void *dat);
+
+void *fdset_del(struct fdset *pfdset, int fd);
+
+void fdset_event_dispatch(struct fdset *pfdset);
+
+#endif
diff --git a/lib/vhost/rte_vhost/rte_virtio_net.h b/lib/vhost/rte_vhost/rte_virtio_net.h
new file mode 100644
index 000000000..926039c5a
--- /dev/null
+++ b/lib/vhost/rte_vhost/rte_virtio_net.h
@@ -0,0 +1,193 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _VIRTIO_NET_H_
+#define _VIRTIO_NET_H_
+
+/**
+ * @file
+ * Interface to vhost net
+ */
+
+#include <stdint.h>
+#include <linux/vhost.h>
+#include <linux/virtio_ring.h>
+#include <linux/virtio_net.h>
+#include <sys/eventfd.h>
+#include <sys/socket.h>
+#include <linux/if.h>
+
+#include <rte_memory.h>
+#include <rte_mempool.h>
+#include <rte_ether.h>
+
+#define RTE_VHOST_USER_CLIENT		(1ULL << 0)
+#define RTE_VHOST_USER_NO_RECONNECT	(1ULL << 1)
+#define RTE_VHOST_USER_DEQUEUE_ZERO_COPY	(1ULL << 2)
+
+/* Enum for virtqueue management. */
+enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM};
+
+/**
+ * Device and vring operations.
+ */
+struct virtio_net_device_ops {
+	int (*new_device)(int vid);		/**< Add device. */
+	void (*destroy_device)(int vid);	/**< Remove device. */
+
+	int (*vring_state_changed)(int vid, uint16_t queue_id, int enable);	/**< triggered when a vring is enabled or disabled */
+
+	void *reserved[5]; /**< Reserved for future extension */
+};
+
+/**
+ *  Disable features in feature_mask. Returns 0 on success.
+ */
+int rte_vhost_feature_disable(uint64_t feature_mask);
+
+/**
+ *  Enable features in feature_mask. Returns 0 on success.
+ */
+int rte_vhost_feature_enable(uint64_t feature_mask);
+
+/* Returns currently supported vhost features */
+uint64_t rte_vhost_feature_get(void);
+
+int rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable);
+
+/**
+ * Register vhost driver. path could be different for multiple
+ * instance support.
+ */
+int rte_vhost_driver_register(const char *path, uint64_t flags);
+
+/* Unregister vhost driver. This is only meaningful to vhost user. */
+int rte_vhost_driver_unregister(const char *path);
+
+/* Register callbacks. */
+int rte_vhost_driver_callback_register(struct virtio_net_device_ops const * const);
+/* Start vhost driver session blocking loop. */
+int rte_vhost_driver_session_start(void);
+
+/**
+ * Get the numa node from which the virtio net device's memory
+ * is allocated.
+ *
+ * @param vid
+ *  virtio-net device ID
+ *
+ * @return
+ *  The numa node, -1 on failure
+ */
+int rte_vhost_get_numa_node(int vid);
+
+/**
+ * Get the number of queues the device supports.
+ *
+ * @param vid
+ *  virtio-net device ID
+ *
+ * @return
+ *  The number of queues, 0 on failure
+ */
+uint32_t rte_vhost_get_queue_num(int vid);
+
+/**
+ * Get the virtio net device's ifname, which is the vhost-user socket
+ * file path.
+ *
+ * @param vid
+ *  virtio-net device ID
+ * @param buf
+ *  The buffer to stored the queried ifname
+ * @param len
+ *  The length of buf
+ *
+ * @return
+ *  0 on success, -1 on failure
+ */
+int rte_vhost_get_ifname(int vid, char *buf, size_t len);
+
+/**
+ * Get how many avail entries are left in the queue
+ *
+ * @param vid
+ *  virtio-net device ID
+ * @param queue_id
+ *  virtio queue index
+ *
+ * @return
+ *  num of avail entires left
+ */
+uint16_t rte_vhost_avail_entries(int vid, uint16_t queue_id);
+
+/**
+ * This function adds buffers to the virtio devices RX virtqueue. Buffers can
+ * be received from the physical port or from another virtual device. A packet
+ * count is returned to indicate the number of packets that were succesfully
+ * added to the RX queue.
+ * @param vid
+ *  virtio-net device ID
+ * @param queue_id
+ *  virtio queue index in mq case
+ * @param pkts
+ *  array to contain packets to be enqueued
+ * @param count
+ *  packets num to be enqueued
+ * @return
+ *  num of packets enqueued
+ */
+uint16_t rte_vhost_enqueue_burst(int vid, uint16_t queue_id,
+	struct rte_mbuf **pkts, uint16_t count);
+
+/**
+ * This function gets guest buffers from the virtio device TX virtqueue,
+ * construct host mbufs, copies guest buffer content to host mbufs and
+ * store them in pkts to be processed.
+ * @param vid
+ *  virtio-net device
+ * @param queue_id
+ *  virtio queue index in mq case
+ * @param mbuf_pool
+ *  mbuf_pool where host mbuf is allocated.
+ * @param pkts
+ *  array to contain packets to be dequeued
+ * @param count
+ *  packets num to be dequeued
+ * @return
+ *  num of packets dequeued
+ */
+uint16_t rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
+	struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count);
+
+#endif /* _VIRTIO_NET_H_ */
diff --git a/lib/vhost/rte_vhost/socket.c b/lib/vhost/rte_vhost/socket.c
new file mode 100644
index 000000000..9276ce58c
--- /dev/null
+++ b/lib/vhost/rte_vhost/socket.c
@@ -0,0 +1,619 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <sys/queue.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <pthread.h>
+
+#include <rte_log.h>
+
+#include "fd_man.h"
+#include "vhost.h"
+#include "vhost_user.h"
+
+/*
+ * Every time rte_vhost_driver_register() is invoked, an associated
+ * vhost_user_socket struct will be created.
+ */
+struct vhost_user_socket {
+	char *path;
+	int listenfd;
+	int connfd;
+	bool is_server;
+	bool reconnect;
+	bool dequeue_zero_copy;
+};
+
+struct vhost_user_connection {
+	struct vhost_user_socket *vsocket;
+	int vid;
+};
+
+#define MAX_VHOST_SOCKET 1024
+struct vhost_user {
+	struct vhost_user_socket *vsockets[MAX_VHOST_SOCKET];
+	struct fdset fdset;
+	int vsocket_cnt;
+	pthread_mutex_t mutex;
+};
+
+#define MAX_VIRTIO_BACKLOG 128
+
+static void vhost_user_server_new_connection(int fd, void *data, int *remove);
+static void vhost_user_read_cb(int fd, void *dat, int *remove);
+static int vhost_user_create_client(struct vhost_user_socket *vsocket);
+
+static struct vhost_user vhost_user = {
+	.fdset = {
+		.fd = { [0 ... MAX_FDS - 1] = {-1, NULL, NULL, NULL, 0} },
+		.fd_mutex = PTHREAD_MUTEX_INITIALIZER,
+		.num = 0
+	},
+	.vsocket_cnt = 0,
+	.mutex = PTHREAD_MUTEX_INITIALIZER,
+};
+
+/* return bytes# of read on success or negative val on failure. */
+int
+read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
+{
+	struct iovec iov;
+	struct msghdr msgh;
+	size_t fdsize = fd_num * sizeof(int);
+	char control[CMSG_SPACE(fdsize)];
+	struct cmsghdr *cmsg;
+	int ret;
+
+	memset(&msgh, 0, sizeof(msgh));
+	iov.iov_base = buf;
+	iov.iov_len  = buflen;
+
+	msgh.msg_iov = &iov;
+	msgh.msg_iovlen = 1;
+	msgh.msg_control = control;
+	msgh.msg_controllen = sizeof(control);
+
+	ret = recvmsg(sockfd, &msgh, 0);
+	if (ret <= 0) {
+		RTE_LOG(ERR, VHOST_CONFIG, "recvmsg failed\n");
+		return ret;
+	}
+
+	if (msgh.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) {
+		RTE_LOG(ERR, VHOST_CONFIG, "truncted msg\n");
+		return -1;
+	}
+
+	for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL;
+		cmsg = CMSG_NXTHDR(&msgh, cmsg)) {
+		if ((cmsg->cmsg_level == SOL_SOCKET) &&
+			(cmsg->cmsg_type == SCM_RIGHTS)) {
+			memcpy(fds, CMSG_DATA(cmsg), fdsize);
+			break;
+		}
+	}
+
+	return ret;
+}
+
+int
+send_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
+{
+
+	struct iovec iov;
+	struct msghdr msgh;
+	size_t fdsize = fd_num * sizeof(int);
+	char control[CMSG_SPACE(fdsize)];
+	struct cmsghdr *cmsg;
+	int ret;
+
+	memset(&msgh, 0, sizeof(msgh));
+	iov.iov_base = buf;
+	iov.iov_len = buflen;
+
+	msgh.msg_iov = &iov;
+	msgh.msg_iovlen = 1;
+
+	if (fds && fd_num > 0) {
+		msgh.msg_control = control;
+		msgh.msg_controllen = sizeof(control);
+		cmsg = CMSG_FIRSTHDR(&msgh);
+		if (cmsg == NULL) {
+			RTE_LOG(ERR, VHOST_CONFIG, "null cmsg\n");
+			return -1;
+		}
+		cmsg->cmsg_len = CMSG_LEN(fdsize);
+		cmsg->cmsg_level = SOL_SOCKET;
+		cmsg->cmsg_type = SCM_RIGHTS;
+		memcpy(CMSG_DATA(cmsg), fds, fdsize);
+	} else {
+		msgh.msg_control = NULL;
+		msgh.msg_controllen = 0;
+	}
+
+	do {
+		ret = sendmsg(sockfd, &msgh, 0);
+	} while (ret < 0 && errno == EINTR);
+
+	if (ret < 0) {
+		RTE_LOG(ERR, VHOST_CONFIG,  "sendmsg error\n");
+		return ret;
+	}
+
+	return ret;
+}
+
+static void
+vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket)
+{
+	int vid;
+	size_t size;
+	struct vhost_user_connection *conn;
+	int ret;
+
+	conn = malloc(sizeof(*conn));
+	if (conn == NULL) {
+		close(fd);
+		return;
+	}
+
+	vid = vhost_new_device();
+	if (vid == -1) {
+		close(fd);
+		free(conn);
+		return;
+	}
+
+	size = strnlen(vsocket->path, PATH_MAX);
+	vhost_set_ifname(vid, vsocket->path, size);
+
+	if (vsocket->dequeue_zero_copy)
+		vhost_enable_dequeue_zero_copy(vid);
+
+	RTE_LOG(INFO, VHOST_CONFIG, "new device, handle is %d\n", vid);
+
+	vsocket->connfd = fd;
+	conn->vsocket = vsocket;
+	conn->vid = vid;
+	ret = fdset_add(&vhost_user.fdset, fd, vhost_user_read_cb,
+			NULL, conn);
+	if (ret < 0) {
+		vsocket->connfd = -1;
+		free(conn);
+		close(fd);
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"failed to add fd %d into vhost server fdset\n",
+			fd);
+	}
+}
+
+/* call back when there is new vhost-user connection from client  */
+static void
+vhost_user_server_new_connection(int fd, void *dat, int *remove __rte_unused)
+{
+	struct vhost_user_socket *vsocket = dat;
+
+	fd = accept(fd, NULL, NULL);
+	if (fd < 0)
+		return;
+
+	RTE_LOG(INFO, VHOST_CONFIG, "new vhost user connection is %d\n", fd);
+	vhost_user_add_connection(fd, vsocket);
+}
+
+static void
+vhost_user_read_cb(int connfd, void *dat, int *remove)
+{
+	struct vhost_user_connection *conn = dat;
+	struct vhost_user_socket *vsocket = conn->vsocket;
+	int ret;
+
+	ret = vhost_user_msg_handler(conn->vid, connfd);
+	if (ret < 0) {
+		vsocket->connfd = -1;
+		close(connfd);
+		*remove = 1;
+		vhost_destroy_device(conn->vid);
+		free(conn);
+
+		if (vsocket->reconnect)
+			vhost_user_create_client(vsocket);
+	}
+}
+
+static int
+create_unix_socket(const char *path, struct sockaddr_un *un, bool is_server)
+{
+	int fd;
+
+	fd = socket(AF_UNIX, SOCK_STREAM, 0);
+	if (fd < 0)
+		return -1;
+	RTE_LOG(INFO, VHOST_CONFIG, "vhost-user %s: socket created, fd: %d\n",
+		is_server ? "server" : "client", fd);
+
+	if (!is_server && fcntl(fd, F_SETFL, O_NONBLOCK)) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"vhost-user: can't set nonblocking mode for socket, fd: "
+			"%d (%s)\n", fd, strerror(errno));
+		close(fd);
+		return -1;
+	}
+
+	memset(un, 0, sizeof(*un));
+	un->sun_family = AF_UNIX;
+	strncpy(un->sun_path, path, sizeof(un->sun_path));
+	un->sun_path[sizeof(un->sun_path) - 1] = '\0';
+
+	return fd;
+}
+
+static int
+vhost_user_create_server(struct vhost_user_socket *vsocket)
+{
+	int fd;
+	int ret;
+	struct sockaddr_un un;
+	const char *path = vsocket->path;
+
+	fd = create_unix_socket(path, &un, vsocket->is_server);
+	if (fd < 0)
+		return -1;
+
+	ret = bind(fd, (struct sockaddr *)&un, sizeof(un));
+	if (ret < 0) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"failed to bind to %s: %s; remove it and try again\n",
+			path, strerror(errno));
+		goto err;
+	}
+	RTE_LOG(INFO, VHOST_CONFIG, "bind to %s\n", path);
+
+	ret = listen(fd, MAX_VIRTIO_BACKLOG);
+	if (ret < 0)
+		goto err;
+
+	vsocket->listenfd = fd;
+	ret = fdset_add(&vhost_user.fdset, fd, vhost_user_server_new_connection,
+		  NULL, vsocket);
+	if (ret < 0) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"failed to add listen fd %d to vhost server fdset\n",
+			fd);
+		goto err;
+	}
+
+	return 0;
+
+err:
+	close(fd);
+	return -1;
+}
+
+struct vhost_user_reconnect {
+	struct sockaddr_un un;
+	int fd;
+	struct vhost_user_socket *vsocket;
+
+	TAILQ_ENTRY(vhost_user_reconnect) next;
+};
+
+TAILQ_HEAD(vhost_user_reconnect_tailq_list, vhost_user_reconnect);
+struct vhost_user_reconnect_list {
+	struct vhost_user_reconnect_tailq_list head;
+	pthread_mutex_t mutex;
+};
+
+static struct vhost_user_reconnect_list reconn_list;
+static pthread_t reconn_tid;
+
+static int
+vhost_user_connect_nonblock(int fd, struct sockaddr *un, size_t sz)
+{
+	int ret, flags;
+
+	ret = connect(fd, un, sz);
+	if (ret < 0 && errno != EISCONN)
+		return -1;
+
+	flags = fcntl(fd, F_GETFL, 0);
+	if (flags < 0) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"can't get flags for connfd %d\n", fd);
+		return -2;
+	}
+	if ((flags & O_NONBLOCK) && fcntl(fd, F_SETFL, flags & ~O_NONBLOCK)) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+				"can't disable nonblocking on fd %d\n", fd);
+		return -2;
+	}
+	return 0;
+}
+
+static void *
+vhost_user_client_reconnect(void *arg __rte_unused)
+{
+	int ret;
+	struct vhost_user_reconnect *reconn, *next;
+
+	while (1) {
+		pthread_mutex_lock(&reconn_list.mutex);
+
+		/*
+		 * An equal implementation of TAILQ_FOREACH_SAFE,
+		 * which does not exist on all platforms.
+		 */
+		for (reconn = TAILQ_FIRST(&reconn_list.head);
+		     reconn != NULL; reconn = next) {
+			next = TAILQ_NEXT(reconn, next);
+
+			ret = vhost_user_connect_nonblock(reconn->fd,
+						(struct sockaddr *)&reconn->un,
+						sizeof(reconn->un));
+			if (ret == -2) {
+				close(reconn->fd);
+				RTE_LOG(ERR, VHOST_CONFIG,
+					"reconnection for fd %d failed\n",
+					reconn->fd);
+				goto remove_fd;
+			}
+			if (ret == -1)
+				continue;
+
+			RTE_LOG(INFO, VHOST_CONFIG,
+				"%s: connected\n", reconn->vsocket->path);
+			vhost_user_add_connection(reconn->fd, reconn->vsocket);
+remove_fd:
+			TAILQ_REMOVE(&reconn_list.head, reconn, next);
+			free(reconn);
+		}
+
+		pthread_mutex_unlock(&reconn_list.mutex);
+		sleep(1);
+	}
+
+	return NULL;
+}
+
+static int
+vhost_user_reconnect_init(void)
+{
+	int ret;
+
+	pthread_mutex_init(&reconn_list.mutex, NULL);
+	TAILQ_INIT(&reconn_list.head);
+
+	ret = pthread_create(&reconn_tid, NULL,
+			     vhost_user_client_reconnect, NULL);
+	if (ret < 0)
+		RTE_LOG(ERR, VHOST_CONFIG, "failed to create reconnect thread");
+
+	return ret;
+}
+
+static int
+vhost_user_create_client(struct vhost_user_socket *vsocket)
+{
+	int fd;
+	int ret;
+	struct sockaddr_un un;
+	const char *path = vsocket->path;
+	struct vhost_user_reconnect *reconn;
+
+	fd = create_unix_socket(path, &un, vsocket->is_server);
+	if (fd < 0)
+		return -1;
+
+	ret = vhost_user_connect_nonblock(fd, (struct sockaddr *)&un,
+					  sizeof(un));
+	if (ret == 0) {
+		vhost_user_add_connection(fd, vsocket);
+		return 0;
+	}
+
+	RTE_LOG(ERR, VHOST_CONFIG,
+		"failed to connect to %s: %s\n",
+		path, strerror(errno));
+
+	if (ret == -2 || !vsocket->reconnect) {
+		close(fd);
+		return -1;
+	}
+
+	RTE_LOG(ERR, VHOST_CONFIG, "%s: reconnecting...\n", path);
+	reconn = malloc(sizeof(*reconn));
+	if (reconn == NULL) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"failed to allocate memory for reconnect\n");
+		close(fd);
+		return -1;
+	}
+	reconn->un = un;
+	reconn->fd = fd;
+	reconn->vsocket = vsocket;
+	pthread_mutex_lock(&reconn_list.mutex);
+	TAILQ_INSERT_TAIL(&reconn_list.head, reconn, next);
+	pthread_mutex_unlock(&reconn_list.mutex);
+
+	return 0;
+}
+
+/*
+ * Register a new vhost-user socket; here we could act as server
+ * (the default case), or client (when RTE_VHOST_USER_CLIENT) flag
+ * is set.
+ */
+int
+rte_vhost_driver_register(const char *path, uint64_t flags)
+{
+	int ret = -1;
+	struct vhost_user_socket *vsocket;
+
+	if (!path)
+		return -1;
+
+	pthread_mutex_lock(&vhost_user.mutex);
+
+	if (vhost_user.vsocket_cnt == MAX_VHOST_SOCKET) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"error: the number of vhost sockets reaches maximum\n");
+		goto out;
+	}
+
+	vsocket = malloc(sizeof(struct vhost_user_socket));
+	if (!vsocket)
+		goto out;
+	memset(vsocket, 0, sizeof(struct vhost_user_socket));
+	vsocket->path = strdup(path);
+	vsocket->connfd = -1;
+	vsocket->dequeue_zero_copy = flags & RTE_VHOST_USER_DEQUEUE_ZERO_COPY;
+
+	if ((flags & RTE_VHOST_USER_CLIENT) != 0) {
+		vsocket->reconnect = !(flags & RTE_VHOST_USER_NO_RECONNECT);
+		if (vsocket->reconnect && reconn_tid == 0) {
+			if (vhost_user_reconnect_init() < 0) {
+				free(vsocket->path);
+				free(vsocket);
+				goto out;
+			}
+		}
+		ret = vhost_user_create_client(vsocket);
+	} else {
+		vsocket->is_server = true;
+		ret = vhost_user_create_server(vsocket);
+	}
+	if (ret < 0) {
+		free(vsocket->path);
+		free(vsocket);
+		goto out;
+	}
+
+	vhost_user.vsockets[vhost_user.vsocket_cnt++] = vsocket;
+
+out:
+	pthread_mutex_unlock(&vhost_user.mutex);
+
+	return ret;
+}
+
+static bool
+vhost_user_remove_reconnect(struct vhost_user_socket *vsocket)
+{
+	int found = false;
+	struct vhost_user_reconnect *reconn, *next;
+
+	pthread_mutex_lock(&reconn_list.mutex);
+
+	for (reconn = TAILQ_FIRST(&reconn_list.head);
+	     reconn != NULL; reconn = next) {
+		next = TAILQ_NEXT(reconn, next);
+
+		if (reconn->vsocket == vsocket) {
+			TAILQ_REMOVE(&reconn_list.head, reconn, next);
+			close(reconn->fd);
+			free(reconn);
+			found = true;
+			break;
+		}
+	}
+	pthread_mutex_unlock(&reconn_list.mutex);
+	return found;
+}
+
+/**
+ * Unregister the specified vhost socket
+ */
+int
+rte_vhost_driver_unregister(const char *path)
+{
+	int i;
+	int count;
+	struct vhost_user_connection *conn;
+
+	pthread_mutex_lock(&vhost_user.mutex);
+
+	for (i = 0; i < vhost_user.vsocket_cnt; i++) {
+		struct vhost_user_socket *vsocket = vhost_user.vsockets[i];
+
+		if (!strcmp(vsocket->path, path)) {
+			if (vsocket->is_server) {
+				fdset_del(&vhost_user.fdset, vsocket->listenfd);
+				close(vsocket->listenfd);
+				unlink(path);
+			} else if (vsocket->reconnect) {
+				vhost_user_remove_reconnect(vsocket);
+			}
+
+			conn = fdset_del(&vhost_user.fdset, vsocket->connfd);
+			if (conn) {
+				RTE_LOG(INFO, VHOST_CONFIG,
+					"free connfd = %d for device '%s'\n",
+					vsocket->connfd, path);
+				close(vsocket->connfd);
+				vhost_destroy_device(conn->vid);
+				free(conn);
+			}
+
+			free(vsocket->path);
+			free(vsocket);
+
+			count = --vhost_user.vsocket_cnt;
+			vhost_user.vsockets[i] = vhost_user.vsockets[count];
+			vhost_user.vsockets[count] = NULL;
+			pthread_mutex_unlock(&vhost_user.mutex);
+
+			return 0;
+		}
+	}
+	pthread_mutex_unlock(&vhost_user.mutex);
+
+	return -1;
+}
+
+int
+rte_vhost_driver_session_start(void)
+{
+	fdset_event_dispatch(&vhost_user.fdset);
+	return 0;
+}
diff --git a/lib/vhost/rte_vhost/vhost.c b/lib/vhost/rte_vhost/vhost.c
new file mode 100644
index 000000000..5270410b6
--- /dev/null
+++ b/lib/vhost/rte_vhost/vhost.c
@@ -0,0 +1,429 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/vhost.h>
+#include <linux/virtio_net.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#ifdef RTE_LIBRTE_VHOST_NUMA
+#include <numaif.h>
+#endif
+
+#include <rte_ethdev.h>
+#include <rte_log.h>
+#include <rte_string_fns.h>
+#include <rte_memory.h>
+#include <rte_malloc.h>
+#include <rte_virtio_net.h>
+
+#include "vhost.h"
+
+#define VHOST_USER_F_PROTOCOL_FEATURES	30
+
+/* Features supported by this lib. */
+#define VHOST_SUPPORTED_FEATURES ((1ULL << VIRTIO_NET_F_MRG_RXBUF) | \
+				(1ULL << VIRTIO_NET_F_CTRL_VQ) | \
+				(1ULL << VIRTIO_NET_F_CTRL_RX) | \
+				(1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) | \
+				(VHOST_SUPPORTS_MQ)            | \
+				(1ULL << VIRTIO_F_VERSION_1)   | \
+				(1ULL << VHOST_F_LOG_ALL)      | \
+				(1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \
+				(1ULL << VIRTIO_NET_F_HOST_TSO4) | \
+				(1ULL << VIRTIO_NET_F_HOST_TSO6) | \
+				(1ULL << VIRTIO_NET_F_CSUM)    | \
+				(1ULL << VIRTIO_NET_F_GUEST_CSUM) | \
+				(1ULL << VIRTIO_NET_F_GUEST_TSO4) | \
+				(1ULL << VIRTIO_NET_F_GUEST_TSO6))
+
+uint64_t VHOST_FEATURES = VHOST_SUPPORTED_FEATURES;
+
+struct virtio_net *vhost_devices[MAX_VHOST_DEVICE];
+
+/* device ops to add/remove device to/from data core. */
+struct virtio_net_device_ops const *notify_ops;
+
+struct virtio_net *
+get_device(int vid)
+{
+	struct virtio_net *dev = vhost_devices[vid];
+
+	if (unlikely(!dev)) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"(%d) device not found.\n", vid);
+	}
+
+	return dev;
+}
+
+static void
+cleanup_vq(struct vhost_virtqueue *vq, int destroy)
+{
+	if ((vq->callfd >= 0) && (destroy != 0))
+		close(vq->callfd);
+	if (vq->kickfd >= 0)
+		close(vq->kickfd);
+}
+
+/*
+ * Unmap any memory, close any file descriptors and
+ * free any memory owned by a device.
+ */
+void
+cleanup_device(struct virtio_net *dev, int destroy)
+{
+	uint32_t i;
+
+	vhost_backend_cleanup(dev);
+
+	for (i = 0; i < dev->virt_qp_nb; i++) {
+		cleanup_vq(dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_RXQ], destroy);
+		cleanup_vq(dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_TXQ], destroy);
+	}
+}
+
+/*
+ * Release virtqueues and device memory.
+ */
+static void
+free_device(struct virtio_net *dev)
+{
+	uint32_t i;
+	struct vhost_virtqueue *rxq, *txq;
+
+	for (i = 0; i < dev->virt_qp_nb; i++) {
+		rxq = dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_RXQ];
+		txq = dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_TXQ];
+
+		rte_free(rxq->shadow_used_ring);
+		rte_free(txq->shadow_used_ring);
+
+		/* rxq and txq are allocated together as queue-pair */
+		rte_free(rxq);
+	}
+
+	rte_free(dev);
+}
+
+static void
+init_vring_queue(struct vhost_virtqueue *vq, int qp_idx)
+{
+	memset(vq, 0, sizeof(struct vhost_virtqueue));
+
+	vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD;
+	vq->callfd = VIRTIO_UNINITIALIZED_EVENTFD;
+
+	/* Backends are set to -1 indicating an inactive device. */
+	vq->backend = -1;
+
+	/* always set the default vq pair to enabled */
+	if (qp_idx == 0)
+		vq->enabled = 1;
+
+	TAILQ_INIT(&vq->zmbuf_list);
+}
+
+static void
+init_vring_queue_pair(struct virtio_net *dev, uint32_t qp_idx)
+{
+	uint32_t base_idx = qp_idx * VIRTIO_QNUM;
+
+	init_vring_queue(dev->virtqueue[base_idx + VIRTIO_RXQ], qp_idx);
+	init_vring_queue(dev->virtqueue[base_idx + VIRTIO_TXQ], qp_idx);
+}
+
+static void
+reset_vring_queue(struct vhost_virtqueue *vq, int qp_idx)
+{
+	int callfd;
+
+	callfd = vq->callfd;
+	init_vring_queue(vq, qp_idx);
+	vq->callfd = callfd;
+}
+
+static void
+reset_vring_queue_pair(struct virtio_net *dev, uint32_t qp_idx)
+{
+	uint32_t base_idx = qp_idx * VIRTIO_QNUM;
+
+	reset_vring_queue(dev->virtqueue[base_idx + VIRTIO_RXQ], qp_idx);
+	reset_vring_queue(dev->virtqueue[base_idx + VIRTIO_TXQ], qp_idx);
+}
+
+int
+alloc_vring_queue_pair(struct virtio_net *dev, uint32_t qp_idx)
+{
+	struct vhost_virtqueue *virtqueue = NULL;
+	uint32_t virt_rx_q_idx = qp_idx * VIRTIO_QNUM + VIRTIO_RXQ;
+	uint32_t virt_tx_q_idx = qp_idx * VIRTIO_QNUM + VIRTIO_TXQ;
+
+	virtqueue = rte_malloc(NULL,
+			       sizeof(struct vhost_virtqueue) * VIRTIO_QNUM, 0);
+	if (virtqueue == NULL) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"Failed to allocate memory for virt qp:%d.\n", qp_idx);
+		return -1;
+	}
+
+	dev->virtqueue[virt_rx_q_idx] = virtqueue;
+	dev->virtqueue[virt_tx_q_idx] = virtqueue + VIRTIO_TXQ;
+
+	init_vring_queue_pair(dev, qp_idx);
+
+	dev->virt_qp_nb += 1;
+
+	return 0;
+}
+
+/*
+ * Reset some variables in device structure, while keeping few
+ * others untouched, such as vid, ifname, virt_qp_nb: they
+ * should be same unless the device is removed.
+ */
+void
+reset_device(struct virtio_net *dev)
+{
+	uint32_t i;
+
+	dev->features = 0;
+	dev->protocol_features = 0;
+	dev->flags = 0;
+
+	for (i = 0; i < dev->virt_qp_nb; i++)
+		reset_vring_queue_pair(dev, i);
+}
+
+/*
+ * Invoked when there is a new vhost-user connection established (when
+ * there is a new virtio device being attached).
+ */
+int
+vhost_new_device(void)
+{
+	struct virtio_net *dev;
+	int i;
+
+	dev = rte_zmalloc(NULL, sizeof(struct virtio_net), 0);
+	if (dev == NULL) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"Failed to allocate memory for new dev.\n");
+		return -1;
+	}
+
+	for (i = 0; i < MAX_VHOST_DEVICE; i++) {
+		if (vhost_devices[i] == NULL)
+			break;
+	}
+	if (i == MAX_VHOST_DEVICE) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"Failed to find a free slot for new device.\n");
+		return -1;
+	}
+
+	vhost_devices[i] = dev;
+	dev->vid = i;
+
+	return i;
+}
+
+/*
+ * Invoked when there is the vhost-user connection is broken (when
+ * the virtio device is being detached).
+ */
+void
+vhost_destroy_device(int vid)
+{
+	struct virtio_net *dev = get_device(vid);
+
+	if (dev == NULL)
+		return;
+
+	if (dev->flags & VIRTIO_DEV_RUNNING) {
+		dev->flags &= ~VIRTIO_DEV_RUNNING;
+		notify_ops->destroy_device(vid);
+	}
+
+	cleanup_device(dev, 1);
+	free_device(dev);
+
+	vhost_devices[vid] = NULL;
+}
+
+void
+vhost_set_ifname(int vid, const char *if_name, unsigned int if_len)
+{
+	struct virtio_net *dev;
+	unsigned int len;
+
+	dev = get_device(vid);
+	if (dev == NULL)
+		return;
+
+	len = if_len > sizeof(dev->ifname) ?
+		sizeof(dev->ifname) : if_len;
+
+	strncpy(dev->ifname, if_name, len);
+	dev->ifname[sizeof(dev->ifname) - 1] = '\0';
+}
+
+void
+vhost_enable_dequeue_zero_copy(int vid)
+{
+	struct virtio_net *dev = get_device(vid);
+
+	if (dev == NULL)
+		return;
+
+	dev->dequeue_zero_copy = 1;
+}
+
+int
+rte_vhost_get_numa_node(int vid)
+{
+#ifdef RTE_LIBRTE_VHOST_NUMA
+	struct virtio_net *dev = get_device(vid);
+	int numa_node;
+	int ret;
+
+	if (dev == NULL)
+		return -1;
+
+	ret = get_mempolicy(&numa_node, NULL, 0, dev,
+			    MPOL_F_NODE | MPOL_F_ADDR);
+	if (ret < 0) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"(%d) failed to query numa node: %d\n", vid, ret);
+		return -1;
+	}
+
+	return numa_node;
+#else
+	RTE_SET_USED(vid);
+	return -1;
+#endif
+}
+
+uint32_t
+rte_vhost_get_queue_num(int vid)
+{
+	struct virtio_net *dev = get_device(vid);
+
+	if (dev == NULL)
+		return 0;
+
+	return dev->virt_qp_nb;
+}
+
+int
+rte_vhost_get_ifname(int vid, char *buf, size_t len)
+{
+	struct virtio_net *dev = get_device(vid);
+
+	if (dev == NULL)
+		return -1;
+
+	len = RTE_MIN(len, sizeof(dev->ifname));
+
+	strncpy(buf, dev->ifname, len);
+	buf[len - 1] = '\0';
+
+	return 0;
+}
+
+uint16_t
+rte_vhost_avail_entries(int vid, uint16_t queue_id)
+{
+	struct virtio_net *dev;
+	struct vhost_virtqueue *vq;
+
+	dev = get_device(vid);
+	if (!dev)
+		return 0;
+
+	vq = dev->virtqueue[queue_id];
+	if (!vq->enabled)
+		return 0;
+
+	return *(volatile uint16_t *)&vq->avail->idx - vq->last_used_idx;
+}
+
+int
+rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable)
+{
+	struct virtio_net *dev = get_device(vid);
+
+	if (dev == NULL)
+		return -1;
+
+	if (enable) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"guest notification isn't supported.\n");
+		return -1;
+	}
+
+	dev->virtqueue[queue_id]->used->flags = VRING_USED_F_NO_NOTIFY;
+	return 0;
+}
+
+uint64_t rte_vhost_feature_get(void)
+{
+	return VHOST_FEATURES;
+}
+
+int rte_vhost_feature_disable(uint64_t feature_mask)
+{
+	VHOST_FEATURES = VHOST_FEATURES & ~feature_mask;
+	return 0;
+}
+
+int rte_vhost_feature_enable(uint64_t feature_mask)
+{
+	if ((feature_mask & VHOST_SUPPORTED_FEATURES) == feature_mask) {
+		VHOST_FEATURES = VHOST_FEATURES | feature_mask;
+		return 0;
+	}
+	return -1;
+}
+
+/*
+ * Register ops so that we can add/remove device to data core.
+ */
+int
+rte_vhost_driver_callback_register(struct virtio_net_device_ops const * const ops)
+{
+	notify_ops = ops;
+
+	return 0;
+}
diff --git a/lib/vhost/rte_vhost/vhost.h b/lib/vhost/rte_vhost/vhost.h
new file mode 100644
index 000000000..0c297d4bf
--- /dev/null
+++ b/lib/vhost/rte_vhost/vhost.h
@@ -0,0 +1,294 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _VHOST_NET_CDEV_H_
+#define _VHOST_NET_CDEV_H_
+#include <stdint.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/queue.h>
+#include <unistd.h>
+#include <linux/vhost.h>
+
+#include <rte_log.h>
+
+#include "rte_virtio_net.h"
+
+/* Used to indicate that the device is running on a data core */
+#define VIRTIO_DEV_RUNNING 1
+
+/* Backend value set by guest. */
+#define VIRTIO_DEV_STOPPED -1
+
+#define BUF_VECTOR_MAX 256
+
+/**
+ * Structure contains buffer address, length and descriptor index
+ * from vring to do scatter RX.
+ */
+struct buf_vector {
+	uint64_t buf_addr;
+	uint32_t buf_len;
+	uint32_t desc_idx;
+};
+
+/*
+ * A structure to hold some fields needed in zero copy code path,
+ * mainly for associating an mbuf with the right desc_idx.
+ */
+struct zcopy_mbuf {
+	struct rte_mbuf *mbuf;
+	uint32_t desc_idx;
+	uint16_t in_use;
+
+	TAILQ_ENTRY(zcopy_mbuf) next;
+};
+TAILQ_HEAD(zcopy_mbuf_list, zcopy_mbuf);
+
+/**
+ * Structure contains variables relevant to RX/TX virtqueues.
+ */
+struct vhost_virtqueue {
+	struct vring_desc	*desc;
+	struct vring_avail	*avail;
+	struct vring_used	*used;
+	uint32_t		size;
+
+	uint16_t		last_avail_idx;
+	uint16_t		last_used_idx;
+#define VIRTIO_INVALID_EVENTFD		(-1)
+#define VIRTIO_UNINITIALIZED_EVENTFD	(-2)
+
+	/* Backend value to determine if device should started/stopped */
+	int			backend;
+	/* Used to notify the guest (trigger interrupt) */
+	int			callfd;
+	/* Currently unused as polling mode is enabled */
+	int			kickfd;
+	int			enabled;
+
+	/* Physical address of used ring, for logging */
+	uint64_t		log_guest_addr;
+
+	uint16_t		nr_zmbuf;
+	uint16_t		zmbuf_size;
+	uint16_t		last_zmbuf_idx;
+	struct zcopy_mbuf	*zmbufs;
+	struct zcopy_mbuf_list	zmbuf_list;
+
+	struct vring_used_elem  *shadow_used_ring;
+	uint16_t                shadow_used_idx;
+} __rte_cache_aligned;
+
+/* Old kernels have no such macro defined */
+#ifndef VIRTIO_NET_F_GUEST_ANNOUNCE
+ #define VIRTIO_NET_F_GUEST_ANNOUNCE 21
+#endif
+
+
+/*
+ * Make an extra wrapper for VIRTIO_NET_F_MQ and
+ * VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX as they are
+ * introduced since kernel v3.8. This makes our
+ * code buildable for older kernel.
+ */
+#ifdef VIRTIO_NET_F_MQ
+ #define VHOST_MAX_QUEUE_PAIRS	VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX
+ #define VHOST_SUPPORTS_MQ	(1ULL << VIRTIO_NET_F_MQ)
+#else
+ #define VHOST_MAX_QUEUE_PAIRS	1
+ #define VHOST_SUPPORTS_MQ	0
+#endif
+
+/*
+ * Define virtio 1.0 for older kernels
+ */
+#ifndef VIRTIO_F_VERSION_1
+ #define VIRTIO_F_VERSION_1 32
+#endif
+
+struct guest_page {
+	uint64_t guest_phys_addr;
+	uint64_t host_phys_addr;
+	uint64_t size;
+};
+
+/**
+ * Device structure contains all configuration information relating
+ * to the device.
+ */
+struct virtio_net {
+	/* Frontend (QEMU) memory and memory region information */
+	struct virtio_memory	*mem;
+	uint64_t		features;
+	uint64_t		protocol_features;
+	int			vid;
+	uint32_t		flags;
+	uint16_t		vhost_hlen;
+	/* to tell if we need broadcast rarp packet */
+	rte_atomic16_t		broadcast_rarp;
+	uint32_t		virt_qp_nb;
+	uint32_t		num_queues;
+	int			dequeue_zero_copy;
+	struct vhost_virtqueue	*virtqueue[VHOST_MAX_QUEUE_PAIRS * 2];
+#define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ)
+	char			ifname[IF_NAME_SZ];
+	uint64_t		log_size;
+	uint64_t		log_base;
+	uint64_t		log_addr;
+	struct ether_addr	mac;
+
+	uint32_t		nr_guest_pages;
+	uint32_t		max_guest_pages;
+	struct guest_page       *guest_pages;
+} __rte_cache_aligned;
+
+/**
+ * Information relating to memory regions including offsets to
+ * addresses in QEMUs memory file.
+ */
+struct virtio_memory_region {
+	uint64_t guest_phys_addr;
+	uint64_t guest_user_addr;
+	uint64_t host_user_addr;
+	uint64_t size;
+	void	 *mmap_addr;
+	uint64_t mmap_size;
+	int fd;
+};
+
+
+/**
+ * Memory structure includes region and mapping information.
+ */
+struct virtio_memory {
+	uint32_t nregions;
+	struct virtio_memory_region regions[0];
+};
+
+
+/* Macros for printing using RTE_LOG */
+#define RTE_LOGTYPE_VHOST_CONFIG RTE_LOGTYPE_USER1
+#define RTE_LOGTYPE_VHOST_DATA   RTE_LOGTYPE_USER1
+
+#ifdef RTE_LIBRTE_VHOST_DEBUG
+#define VHOST_MAX_PRINT_BUFF 6072
+#define LOG_LEVEL RTE_LOG_DEBUG
+#define LOG_DEBUG(log_type, fmt, args...) RTE_LOG(DEBUG, log_type, fmt, ##args)
+#define PRINT_PACKET(device, addr, size, header) do { \
+	char *pkt_addr = (char *)(addr); \
+	unsigned int index; \
+	char packet[VHOST_MAX_PRINT_BUFF]; \
+	\
+	if ((header)) \
+		snprintf(packet, VHOST_MAX_PRINT_BUFF, "(%d) Header size %d: ", (device->vid), (size)); \
+	else \
+		snprintf(packet, VHOST_MAX_PRINT_BUFF, "(%d) Packet size %d: ", (device->vid), (size)); \
+	for (index = 0; index < (size); index++) { \
+		snprintf(packet + strnlen(packet, VHOST_MAX_PRINT_BUFF), VHOST_MAX_PRINT_BUFF - strnlen(packet, VHOST_MAX_PRINT_BUFF), \
+			"%02hhx ", pkt_addr[index]); \
+	} \
+	snprintf(packet + strnlen(packet, VHOST_MAX_PRINT_BUFF), VHOST_MAX_PRINT_BUFF - strnlen(packet, VHOST_MAX_PRINT_BUFF), "\n"); \
+	\
+	LOG_DEBUG(VHOST_DATA, "%s", packet); \
+} while (0)
+#else
+#define LOG_LEVEL RTE_LOG_INFO
+#define LOG_DEBUG(log_type, fmt, args...) do {} while (0)
+#define PRINT_PACKET(device, addr, size, header) do {} while (0)
+#endif
+
+extern uint64_t VHOST_FEATURES;
+#define MAX_VHOST_DEVICE	1024
+extern struct virtio_net *vhost_devices[MAX_VHOST_DEVICE];
+
+/* Convert guest physical Address to host virtual address */
+static inline uint64_t __attribute__((always_inline))
+gpa_to_vva(struct virtio_net *dev, uint64_t gpa)
+{
+	struct virtio_memory_region *reg;
+	uint32_t i;
+
+	for (i = 0; i < dev->mem->nregions; i++) {
+		reg = &dev->mem->regions[i];
+		if (gpa >= reg->guest_phys_addr &&
+		    gpa <  reg->guest_phys_addr + reg->size) {
+			return gpa - reg->guest_phys_addr +
+			       reg->host_user_addr;
+		}
+	}
+
+	return 0;
+}
+
+/* Convert guest physical address to host physical address */
+static inline phys_addr_t __attribute__((always_inline))
+gpa_to_hpa(struct virtio_net *dev, uint64_t gpa, uint64_t size)
+{
+	uint32_t i;
+	struct guest_page *page;
+
+	for (i = 0; i < dev->nr_guest_pages; i++) {
+		page = &dev->guest_pages[i];
+
+		if (gpa >= page->guest_phys_addr &&
+		    gpa + size < page->guest_phys_addr + page->size) {
+			return gpa - page->guest_phys_addr +
+			       page->host_phys_addr;
+		}
+	}
+
+	return 0;
+}
+
+extern struct virtio_net_device_ops const *notify_ops;
+struct virtio_net *get_device(int vid);
+
+int vhost_new_device(void);
+void cleanup_device(struct virtio_net *dev, int destroy);
+void reset_device(struct virtio_net *dev);
+void vhost_destroy_device(int);
+
+int alloc_vring_queue_pair(struct virtio_net *dev, uint32_t qp_idx);
+
+void vhost_set_ifname(int, const char *if_name, unsigned int if_len);
+void vhost_enable_dequeue_zero_copy(int vid);
+
+/*
+ * Backend-specific cleanup.
+ *
+ * TODO: fix it; we have one backend now
+ */
+void vhost_backend_cleanup(struct virtio_net *dev);
+
+#endif /* _VHOST_NET_CDEV_H_ */
diff --git a/lib/vhost/rte_vhost/vhost_user.c b/lib/vhost/rte_vhost/vhost_user.c
new file mode 100644
index 000000000..7693af71d
--- /dev/null
+++ b/lib/vhost/rte_vhost/vhost_user.c
@@ -0,0 +1,1042 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <assert.h>
+#ifdef RTE_LIBRTE_VHOST_NUMA
+#include <numaif.h>
+#endif
+
+#include <rte_common.h>
+#include <rte_malloc.h>
+#include <rte_log.h>
+
+#include "vhost.h"
+#include "vhost_user.h"
+
+static const char *vhost_message_str[VHOST_USER_MAX] = {
+	[VHOST_USER_NONE] = "VHOST_USER_NONE",
+	[VHOST_USER_GET_FEATURES] = "VHOST_USER_GET_FEATURES",
+	[VHOST_USER_SET_FEATURES] = "VHOST_USER_SET_FEATURES",
+	[VHOST_USER_SET_OWNER] = "VHOST_USER_SET_OWNER",
+	[VHOST_USER_RESET_OWNER] = "VHOST_USER_RESET_OWNER",
+	[VHOST_USER_SET_MEM_TABLE] = "VHOST_USER_SET_MEM_TABLE",
+	[VHOST_USER_SET_LOG_BASE] = "VHOST_USER_SET_LOG_BASE",
+	[VHOST_USER_SET_LOG_FD] = "VHOST_USER_SET_LOG_FD",
+	[VHOST_USER_SET_VRING_NUM] = "VHOST_USER_SET_VRING_NUM",
+	[VHOST_USER_SET_VRING_ADDR] = "VHOST_USER_SET_VRING_ADDR",
+	[VHOST_USER_SET_VRING_BASE] = "VHOST_USER_SET_VRING_BASE",
+	[VHOST_USER_GET_VRING_BASE] = "VHOST_USER_GET_VRING_BASE",
+	[VHOST_USER_SET_VRING_KICK] = "VHOST_USER_SET_VRING_KICK",
+	[VHOST_USER_SET_VRING_CALL] = "VHOST_USER_SET_VRING_CALL",
+	[VHOST_USER_SET_VRING_ERR]  = "VHOST_USER_SET_VRING_ERR",
+	[VHOST_USER_GET_PROTOCOL_FEATURES]  = "VHOST_USER_GET_PROTOCOL_FEATURES",
+	[VHOST_USER_SET_PROTOCOL_FEATURES]  = "VHOST_USER_SET_PROTOCOL_FEATURES",
+	[VHOST_USER_GET_QUEUE_NUM]  = "VHOST_USER_GET_QUEUE_NUM",
+	[VHOST_USER_SET_VRING_ENABLE]  = "VHOST_USER_SET_VRING_ENABLE",
+	[VHOST_USER_SEND_RARP]  = "VHOST_USER_SEND_RARP",
+};
+
+static uint64_t
+get_blk_size(int fd)
+{
+	struct stat stat;
+	int ret;
+
+	ret = fstat(fd, &stat);
+	return ret == -1 ? (uint64_t)-1 : (uint64_t)stat.st_blksize;
+}
+
+static void
+free_mem_region(struct virtio_net *dev)
+{
+	uint32_t i;
+	struct virtio_memory_region *reg;
+
+	if (!dev || !dev->mem)
+		return;
+
+	for (i = 0; i < dev->mem->nregions; i++) {
+		reg = &dev->mem->regions[i];
+		if (reg->host_user_addr) {
+			munmap(reg->mmap_addr, reg->mmap_size);
+			close(reg->fd);
+		}
+	}
+}
+
+void
+vhost_backend_cleanup(struct virtio_net *dev)
+{
+	if (dev->mem) {
+		free_mem_region(dev);
+		rte_free(dev->mem);
+		dev->mem = NULL;
+	}
+	if (dev->log_addr) {
+		munmap((void *)(uintptr_t)dev->log_addr, dev->log_size);
+		dev->log_addr = 0;
+	}
+}
+
+/*
+ * This function just returns success at the moment unless
+ * the device hasn't been initialised.
+ */
+static int
+vhost_user_set_owner(void)
+{
+	return 0;
+}
+
+static int
+vhost_user_reset_owner(struct virtio_net *dev)
+{
+	if (dev->flags & VIRTIO_DEV_RUNNING) {
+		dev->flags &= ~VIRTIO_DEV_RUNNING;
+		notify_ops->destroy_device(dev->vid);
+	}
+
+	cleanup_device(dev, 0);
+	reset_device(dev);
+	return 0;
+}
+
+/*
+ * The features that we support are requested.
+ */
+static uint64_t
+vhost_user_get_features(void)
+{
+	return VHOST_FEATURES;
+}
+
+/*
+ * We receive the negotiated features supported by us and the virtio device.
+ */
+static int
+vhost_user_set_features(struct virtio_net *dev, uint64_t features)
+{
+	if (features & ~VHOST_FEATURES)
+		return -1;
+
+	dev->features = features;
+	if (dev->features &
+		((1 << VIRTIO_NET_F_MRG_RXBUF) | (1ULL << VIRTIO_F_VERSION_1))) {
+		dev->vhost_hlen = sizeof(struct virtio_net_hdr_mrg_rxbuf);
+	} else {
+		dev->vhost_hlen = sizeof(struct virtio_net_hdr);
+	}
+	LOG_DEBUG(VHOST_CONFIG,
+		"(%d) mergeable RX buffers %s, virtio 1 %s\n",
+		dev->vid,
+		(dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF)) ? "on" : "off",
+		(dev->features & (1ULL << VIRTIO_F_VERSION_1)) ? "on" : "off");
+
+	return 0;
+}
+
+/*
+ * The virtio device sends us the size of the descriptor ring.
+ */
+static int
+vhost_user_set_vring_num(struct virtio_net *dev,
+			 VhostUserMsg *msg)
+{
+	struct vhost_virtqueue *vq = dev->virtqueue[msg->payload.state.index];
+
+	vq->size = msg->payload.state.num;
+
+	if (dev->dequeue_zero_copy) {
+		vq->nr_zmbuf = 0;
+		vq->last_zmbuf_idx = 0;
+		vq->zmbuf_size = vq->size;
+		vq->zmbufs = rte_zmalloc(NULL, vq->zmbuf_size *
+					 sizeof(struct zcopy_mbuf), 0);
+		if (vq->zmbufs == NULL) {
+			RTE_LOG(WARNING, VHOST_CONFIG,
+				"failed to allocate mem for zero copy; "
+				"zero copy is force disabled\n");
+			dev->dequeue_zero_copy = 0;
+		}
+	}
+
+	vq->shadow_used_ring = rte_malloc(NULL,
+				vq->size * sizeof(struct vring_used_elem),
+				RTE_CACHE_LINE_SIZE);
+	if (!vq->shadow_used_ring) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"failed to allocate memory for shadow used ring.\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+/*
+ * Reallocate virtio_dev and vhost_virtqueue data structure to make them on the
+ * same numa node as the memory of vring descriptor.
+ */
+#ifdef RTE_LIBRTE_VHOST_NUMA
+static struct virtio_net*
+numa_realloc(struct virtio_net *dev, int index)
+{
+	int oldnode, newnode;
+	struct virtio_net *old_dev;
+	struct vhost_virtqueue *old_vq, *vq;
+	int ret;
+
+	/*
+	 * vq is allocated on pairs, we should try to do realloc
+	 * on first queue of one queue pair only.
+	 */
+	if (index % VIRTIO_QNUM != 0)
+		return dev;
+
+	old_dev = dev;
+	vq = old_vq = dev->virtqueue[index];
+
+	ret = get_mempolicy(&newnode, NULL, 0, old_vq->desc,
+			    MPOL_F_NODE | MPOL_F_ADDR);
+
+	/* check if we need to reallocate vq */
+	ret |= get_mempolicy(&oldnode, NULL, 0, old_vq,
+			     MPOL_F_NODE | MPOL_F_ADDR);
+	if (ret) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"Unable to get vq numa information.\n");
+		return dev;
+	}
+	if (oldnode != newnode) {
+		RTE_LOG(INFO, VHOST_CONFIG,
+			"reallocate vq from %d to %d node\n", oldnode, newnode);
+		vq = rte_malloc_socket(NULL, sizeof(*vq) * VIRTIO_QNUM, 0,
+				       newnode);
+		if (!vq)
+			return dev;
+
+		memcpy(vq, old_vq, sizeof(*vq) * VIRTIO_QNUM);
+		rte_free(old_vq);
+	}
+
+	/* check if we need to reallocate dev */
+	ret = get_mempolicy(&oldnode, NULL, 0, old_dev,
+			    MPOL_F_NODE | MPOL_F_ADDR);
+	if (ret) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"Unable to get dev numa information.\n");
+		goto out;
+	}
+	if (oldnode != newnode) {
+		RTE_LOG(INFO, VHOST_CONFIG,
+			"reallocate dev from %d to %d node\n",
+			oldnode, newnode);
+		dev = rte_malloc_socket(NULL, sizeof(*dev), 0, newnode);
+		if (!dev) {
+			dev = old_dev;
+			goto out;
+		}
+
+		memcpy(dev, old_dev, sizeof(*dev));
+		rte_free(old_dev);
+	}
+
+out:
+	dev->virtqueue[index] = vq;
+	dev->virtqueue[index + 1] = vq + 1;
+	vhost_devices[dev->vid] = dev;
+
+	return dev;
+}
+#else
+static struct virtio_net*
+numa_realloc(struct virtio_net *dev, int index __rte_unused)
+{
+	return dev;
+}
+#endif
+
+/*
+ * Converts QEMU virtual address to Vhost virtual address. This function is
+ * used to convert the ring addresses to our address space.
+ */
+static uint64_t
+qva_to_vva(struct virtio_net *dev, uint64_t qva)
+{
+	struct virtio_memory_region *reg;
+	uint32_t i;
+
+	/* Find the region where the address lives. */
+	for (i = 0; i < dev->mem->nregions; i++) {
+		reg = &dev->mem->regions[i];
+
+		if (qva >= reg->guest_user_addr &&
+		    qva <  reg->guest_user_addr + reg->size) {
+			return qva - reg->guest_user_addr +
+			       reg->host_user_addr;
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * The virtio device sends us the desc, used and avail ring addresses.
+ * This function then converts these to our address space.
+ */
+static int
+vhost_user_set_vring_addr(struct virtio_net *dev, VhostUserMsg *msg)
+{
+	struct vhost_virtqueue *vq;
+
+	if (dev->mem == NULL)
+		return -1;
+
+	/* addr->index refers to the queue index. The txq 1, rxq is 0. */
+	vq = dev->virtqueue[msg->payload.addr.index];
+
+	/* The addresses are converted from QEMU virtual to Vhost virtual. */
+	vq->desc = (struct vring_desc *)(uintptr_t)qva_to_vva(dev,
+			msg->payload.addr.desc_user_addr);
+	if (vq->desc == 0) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"(%d) failed to find desc ring address.\n",
+			dev->vid);
+		return -1;
+	}
+
+	dev = numa_realloc(dev, msg->payload.addr.index);
+	vq = dev->virtqueue[msg->payload.addr.index];
+
+	vq->avail = (struct vring_avail *)(uintptr_t)qva_to_vva(dev,
+			msg->payload.addr.avail_user_addr);
+	if (vq->avail == 0) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"(%d) failed to find avail ring address.\n",
+			dev->vid);
+		return -1;
+	}
+
+	vq->used = (struct vring_used *)(uintptr_t)qva_to_vva(dev,
+			msg->payload.addr.used_user_addr);
+	if (vq->used == 0) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"(%d) failed to find used ring address.\n",
+			dev->vid);
+		return -1;
+	}
+
+	if (vq->last_used_idx != vq->used->idx) {
+		RTE_LOG(WARNING, VHOST_CONFIG,
+			"last_used_idx (%u) and vq->used->idx (%u) mismatches; "
+			"some packets maybe resent for Tx and dropped for Rx\n",
+			vq->last_used_idx, vq->used->idx);
+		vq->last_used_idx  = vq->used->idx;
+		vq->last_avail_idx = vq->used->idx;
+	}
+
+	vq->log_guest_addr = msg->payload.addr.log_guest_addr;
+
+	LOG_DEBUG(VHOST_CONFIG, "(%d) mapped address desc: %p\n",
+			dev->vid, vq->desc);
+	LOG_DEBUG(VHOST_CONFIG, "(%d) mapped address avail: %p\n",
+			dev->vid, vq->avail);
+	LOG_DEBUG(VHOST_CONFIG, "(%d) mapped address used: %p\n",
+			dev->vid, vq->used);
+	LOG_DEBUG(VHOST_CONFIG, "(%d) log_guest_addr: %" PRIx64 "\n",
+			dev->vid, vq->log_guest_addr);
+
+	return 0;
+}
+
+/*
+ * The virtio device sends us the available ring last used index.
+ */
+static int
+vhost_user_set_vring_base(struct virtio_net *dev,
+			  VhostUserMsg *msg)
+{
+	dev->virtqueue[msg->payload.state.index]->last_used_idx  = msg->payload.state.num;
+	dev->virtqueue[msg->payload.state.index]->last_avail_idx = msg->payload.state.num;
+
+	return 0;
+}
+
+static void
+add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr,
+		   uint64_t host_phys_addr, uint64_t size)
+{
+	struct guest_page *page, *last_page;
+
+	if (dev->nr_guest_pages == dev->max_guest_pages &&
+	    dev->nr_guest_pages > 0) {
+		dev->max_guest_pages *= 2;
+		dev->guest_pages = realloc(dev->guest_pages,
+					dev->max_guest_pages * sizeof(*page));
+	}
+
+	if (dev->nr_guest_pages > 0) {
+		last_page = &dev->guest_pages[dev->nr_guest_pages - 1];
+		/* merge if the two pages are continuous */
+		if (host_phys_addr == last_page->host_phys_addr +
+				      last_page->size) {
+			last_page->size += size;
+			return;
+		}
+	}
+
+	page = &dev->guest_pages[dev->nr_guest_pages++];
+	page->guest_phys_addr = guest_phys_addr;
+	page->host_phys_addr  = host_phys_addr;
+	page->size = size;
+}
+
+static void
+add_guest_pages(struct virtio_net *dev, struct virtio_memory_region *reg,
+		uint64_t page_size)
+{
+	uint64_t reg_size = reg->size;
+	uint64_t host_user_addr  = reg->host_user_addr;
+	uint64_t guest_phys_addr = reg->guest_phys_addr;
+	uint64_t host_phys_addr;
+	uint64_t size;
+
+	host_phys_addr = rte_mem_virt2phy((void *)(uintptr_t)host_user_addr);
+	size = page_size - (guest_phys_addr & (page_size - 1));
+	size = RTE_MIN(size, reg_size);
+
+	add_one_guest_page(dev, guest_phys_addr, host_phys_addr, size);
+	host_user_addr  += size;
+	guest_phys_addr += size;
+	reg_size -= size;
+
+	while (reg_size > 0) {
+		host_phys_addr = rte_mem_virt2phy((void *)(uintptr_t)
+						  host_user_addr);
+		add_one_guest_page(dev, guest_phys_addr, host_phys_addr,
+				   page_size);
+
+		host_user_addr  += page_size;
+		guest_phys_addr += page_size;
+		reg_size -= page_size;
+	}
+}
+
+#ifdef RTE_LIBRTE_VHOST_DEBUG
+/* TODO: enable it only in debug mode? */
+static void
+dump_guest_pages(struct virtio_net *dev)
+{
+	uint32_t i;
+	struct guest_page *page;
+
+	for (i = 0; i < dev->nr_guest_pages; i++) {
+		page = &dev->guest_pages[i];
+
+		RTE_LOG(INFO, VHOST_CONFIG,
+			"guest physical page region %u\n"
+			"\t guest_phys_addr: %" PRIx64 "\n"
+			"\t host_phys_addr : %" PRIx64 "\n"
+			"\t size           : %" PRIx64 "\n",
+			i,
+			page->guest_phys_addr,
+			page->host_phys_addr,
+			page->size);
+	}
+}
+#else
+#define dump_guest_pages(dev)
+#endif
+
+static int
+vhost_user_set_mem_table(struct virtio_net *dev, struct VhostUserMsg *pmsg)
+{
+	struct VhostUserMemory memory = pmsg->payload.memory;
+	struct virtio_memory_region *reg;
+	void *mmap_addr;
+	uint64_t mmap_size;
+	uint64_t mmap_offset;
+	uint64_t alignment;
+	uint32_t i;
+	int fd;
+
+	/* Remove from the data plane. */
+	if (dev->flags & VIRTIO_DEV_RUNNING) {
+		dev->flags &= ~VIRTIO_DEV_RUNNING;
+		notify_ops->destroy_device(dev->vid);
+	}
+
+	if (dev->mem) {
+		free_mem_region(dev);
+		rte_free(dev->mem);
+		dev->mem = NULL;
+	}
+
+	dev->nr_guest_pages = 0;
+	if (!dev->guest_pages) {
+		dev->max_guest_pages = 8;
+		dev->guest_pages = malloc(dev->max_guest_pages *
+						sizeof(struct guest_page));
+	}
+
+	dev->mem = rte_zmalloc("vhost-mem-table", sizeof(struct virtio_memory) +
+		sizeof(struct virtio_memory_region) * memory.nregions, 0);
+	if (dev->mem == NULL) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"(%d) failed to allocate memory for dev->mem\n",
+			dev->vid);
+		return -1;
+	}
+	dev->mem->nregions = memory.nregions;
+
+	for (i = 0; i < memory.nregions; i++) {
+		fd  = pmsg->fds[i];
+		reg = &dev->mem->regions[i];
+
+		reg->guest_phys_addr = memory.regions[i].guest_phys_addr;
+		reg->guest_user_addr = memory.regions[i].userspace_addr;
+		reg->size            = memory.regions[i].memory_size;
+		reg->fd              = fd;
+
+		mmap_offset = memory.regions[i].mmap_offset;
+		mmap_size   = reg->size + mmap_offset;
+
+		/* mmap() without flag of MAP_ANONYMOUS, should be called
+		 * with length argument aligned with hugepagesz at older
+		 * longterm version Linux, like 2.6.32 and 3.2.72, or
+		 * mmap() will fail with EINVAL.
+		 *
+		 * to avoid failure, make sure in caller to keep length
+		 * aligned.
+		 */
+		alignment = get_blk_size(fd);
+		if (alignment == (uint64_t)-1) {
+			RTE_LOG(ERR, VHOST_CONFIG,
+				"couldn't get hugepage size through fstat\n");
+			goto err_mmap;
+		}
+		mmap_size = RTE_ALIGN_CEIL(mmap_size, alignment);
+
+		mmap_addr = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE,
+				 MAP_SHARED | MAP_POPULATE, fd, 0);
+
+		if (mmap_addr == MAP_FAILED) {
+			RTE_LOG(ERR, VHOST_CONFIG,
+				"mmap region %u failed.\n", i);
+			goto err_mmap;
+		}
+
+		reg->mmap_addr = mmap_addr;
+		reg->mmap_size = mmap_size;
+		reg->host_user_addr = (uint64_t)(uintptr_t)mmap_addr +
+				      mmap_offset;
+
+		add_guest_pages(dev, reg, alignment);
+
+		RTE_LOG(INFO, VHOST_CONFIG,
+			"guest memory region %u, size: 0x%" PRIx64 "\n"
+			"\t guest physical addr: 0x%" PRIx64 "\n"
+			"\t guest virtual  addr: 0x%" PRIx64 "\n"
+			"\t host  virtual  addr: 0x%" PRIx64 "\n"
+			"\t mmap addr : 0x%" PRIx64 "\n"
+			"\t mmap size : 0x%" PRIx64 "\n"
+			"\t mmap align: 0x%" PRIx64 "\n"
+			"\t mmap off  : 0x%" PRIx64 "\n",
+			i, reg->size,
+			reg->guest_phys_addr,
+			reg->guest_user_addr,
+			reg->host_user_addr,
+			(uint64_t)(uintptr_t)mmap_addr,
+			mmap_size,
+			alignment,
+			mmap_offset);
+	}
+
+	dump_guest_pages(dev);
+
+	return 0;
+
+err_mmap:
+	free_mem_region(dev);
+	rte_free(dev->mem);
+	dev->mem = NULL;
+	return -1;
+}
+
+static int
+vq_is_ready(struct vhost_virtqueue *vq)
+{
+	return vq && vq->desc   &&
+	       vq->kickfd != VIRTIO_UNINITIALIZED_EVENTFD &&
+	       vq->callfd != VIRTIO_UNINITIALIZED_EVENTFD;
+}
+
+static int
+virtio_is_ready(struct virtio_net *dev)
+{
+	struct vhost_virtqueue *vq;
+	uint32_t i;
+
+	for (i = 0; i < dev->num_queues; i++) {
+		vq = dev->virtqueue[i];
+
+		if (!vq_is_ready(vq)) {
+			RTE_LOG(INFO, VHOST_CONFIG,
+				"virtio is not ready for processing.\n");
+			return 0;
+		}
+	}
+
+	RTE_LOG(INFO, VHOST_CONFIG,
+		"virtio is now ready for processing.\n");
+	return 1;
+}
+
+static void
+vhost_user_set_vring_call(struct virtio_net *dev, struct VhostUserMsg *pmsg)
+{
+	struct vhost_vring_file file;
+	struct vhost_virtqueue *vq;
+	uint32_t cur_qp_idx;
+
+	file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
+	if (pmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)
+		file.fd = VIRTIO_INVALID_EVENTFD;
+	else
+		file.fd = pmsg->fds[0];
+	RTE_LOG(INFO, VHOST_CONFIG,
+		"vring call idx:%d file:%d\n", file.index, file.fd);
+
+	if (file.index + 1 > dev->num_queues) {
+		dev->num_queues = file.index + 1;
+	}
+
+	/*
+	 * FIXME: VHOST_SET_VRING_CALL is the first per-vring message
+	 * we get, so we do vring queue pair allocation here.
+	 */
+	cur_qp_idx = file.index / VIRTIO_QNUM;
+	if (cur_qp_idx + 1 > dev->virt_qp_nb) {
+		if (alloc_vring_queue_pair(dev, cur_qp_idx) < 0)
+			return;
+	}
+
+	vq = dev->virtqueue[file.index];
+	assert(vq != NULL);
+
+	if (vq->callfd >= 0)
+		close(vq->callfd);
+
+	vq->callfd = file.fd;
+
+	if (virtio_is_ready(dev) && !(dev->flags & VIRTIO_DEV_RUNNING)) {
+		notify_ops->new_device(dev->vid);
+	}
+}
+
+/*
+ *  In vhost-user, when we receive kick message, will test whether virtio
+ *  device is ready for packet processing.
+ */
+static void
+vhost_user_set_vring_kick(struct virtio_net *dev, struct VhostUserMsg *pmsg)
+{
+	struct vhost_vring_file file;
+	struct vhost_virtqueue *vq;
+
+	file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
+	if (pmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)
+		file.fd = VIRTIO_INVALID_EVENTFD;
+	else
+		file.fd = pmsg->fds[0];
+	RTE_LOG(INFO, VHOST_CONFIG,
+		"vring kick idx:%d file:%d\n", file.index, file.fd);
+
+	vq = dev->virtqueue[file.index];
+	if (vq->kickfd >= 0)
+		close(vq->kickfd);
+	vq->kickfd = file.fd;
+
+	if (virtio_is_ready(dev) && !(dev->flags & VIRTIO_DEV_RUNNING)) {
+		if (dev->dequeue_zero_copy) {
+			RTE_LOG(INFO, VHOST_CONFIG,
+				"dequeue zero copy is enabled\n");
+		}
+
+		if (notify_ops->new_device(dev->vid) == 0)
+			dev->flags |= VIRTIO_DEV_RUNNING;
+	}
+}
+
+static void
+free_zmbufs(struct vhost_virtqueue *vq)
+{
+	struct zcopy_mbuf *zmbuf, *next;
+
+	for (zmbuf = TAILQ_FIRST(&vq->zmbuf_list);
+	     zmbuf != NULL; zmbuf = next) {
+		next = TAILQ_NEXT(zmbuf, next);
+
+		rte_pktmbuf_free(zmbuf->mbuf);
+		TAILQ_REMOVE(&vq->zmbuf_list, zmbuf, next);
+	}
+
+	rte_free(vq->zmbufs);
+}
+
+/*
+ * when virtio is stopped, qemu will send us the GET_VRING_BASE message.
+ */
+static int
+vhost_user_get_vring_base(struct virtio_net *dev,
+			  VhostUserMsg *msg)
+{
+	struct vhost_virtqueue *vq = dev->virtqueue[msg->payload.state.index];
+
+	/* We have to stop the queue (virtio) if it is running. */
+	if (dev->flags & VIRTIO_DEV_RUNNING) {
+		dev->flags &= ~VIRTIO_DEV_RUNNING;
+		notify_ops->destroy_device(dev->vid);
+	}
+
+	/* Here we are safe to get the last used index */
+	msg->payload.state.num = vq->last_used_idx;
+
+	RTE_LOG(INFO, VHOST_CONFIG,
+		"vring base idx:%d file:%d\n", msg->payload.state.index, msg->payload.state.num);
+	/*
+	 * Based on current qemu vhost-user implementation, this message is
+	 * sent and only sent in vhost_vring_stop.
+	 * TODO: cleanup the vring, it isn't usable since here.
+	 */
+	if (vq->kickfd >= 0)
+		close(vq->kickfd);
+
+	vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD;
+	vq->callfd = VIRTIO_UNINITIALIZED_EVENTFD;
+
+	if (dev->dequeue_zero_copy)
+		free_zmbufs(vq);
+	rte_free(vq->shadow_used_ring);
+	vq->shadow_used_ring = NULL;
+
+	return 0;
+}
+
+/*
+ * when virtio queues are ready to work, qemu will send us to
+ * enable the virtio queue pair.
+ */
+static int
+vhost_user_set_vring_enable(struct virtio_net *dev,
+			    VhostUserMsg *msg)
+{
+	int enable = (int)msg->payload.state.num;
+
+	RTE_LOG(INFO, VHOST_CONFIG,
+		"set queue enable: %d to qp idx: %d\n",
+		enable, msg->payload.state.index);
+
+	if (notify_ops->vring_state_changed)
+		notify_ops->vring_state_changed(dev->vid, msg->payload.state.index, enable);
+
+	dev->virtqueue[msg->payload.state.index]->enabled = enable;
+
+	return 0;
+}
+
+static void
+vhost_user_set_protocol_features(struct virtio_net *dev,
+				 uint64_t protocol_features)
+{
+	if (protocol_features & ~VHOST_USER_PROTOCOL_FEATURES)
+		return;
+
+	dev->protocol_features = protocol_features;
+}
+
+static int
+vhost_user_set_log_base(struct virtio_net *dev, struct VhostUserMsg *msg)
+{
+	int fd = msg->fds[0];
+	uint64_t size, off;
+	void *addr;
+
+	if (fd < 0) {
+		RTE_LOG(ERR, VHOST_CONFIG, "invalid log fd: %d\n", fd);
+		return -1;
+	}
+
+	if (msg->size != sizeof(VhostUserLog)) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"invalid log base msg size: %"PRId32" != %d\n",
+			msg->size, (int)sizeof(VhostUserLog));
+		return -1;
+	}
+
+	size = msg->payload.log.mmap_size;
+	off  = msg->payload.log.mmap_offset;
+	RTE_LOG(INFO, VHOST_CONFIG,
+		"log mmap size: %"PRId64", offset: %"PRId64"\n",
+		size, off);
+
+	/*
+	 * mmap from 0 to workaround a hugepage mmap bug: mmap will
+	 * fail when offset is not page size aligned.
+	 */
+	addr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+	close(fd);
+	if (addr == MAP_FAILED) {
+		RTE_LOG(ERR, VHOST_CONFIG, "mmap log base failed!\n");
+		return -1;
+	}
+
+	/*
+	 * Free previously mapped log memory on occasionally
+	 * multiple VHOST_USER_SET_LOG_BASE.
+	 */
+	if (dev->log_addr) {
+		munmap((void *)(uintptr_t)dev->log_addr, dev->log_size);
+	}
+	dev->log_addr = (uint64_t)(uintptr_t)addr;
+	dev->log_base = dev->log_addr + off;
+	dev->log_size = size;
+
+	return 0;
+}
+
+/*
+ * An rarp packet is constructed and broadcasted to notify switches about
+ * the new location of the migrated VM, so that packets from outside will
+ * not be lost after migration.
+ *
+ * However, we don't actually "send" a rarp packet here, instead, we set
+ * a flag 'broadcast_rarp' to let rte_vhost_dequeue_burst() inject it.
+ */
+static int
+vhost_user_send_rarp(struct virtio_net *dev, struct VhostUserMsg *msg)
+{
+	uint8_t *mac = (uint8_t *)&msg->payload.u64;
+
+	RTE_LOG(DEBUG, VHOST_CONFIG,
+		":: mac: %02x:%02x:%02x:%02x:%02x:%02x\n",
+		mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]);
+	memcpy(dev->mac.addr_bytes, mac, 6);
+
+	/*
+	 * Set the flag to inject a RARP broadcast packet at
+	 * rte_vhost_dequeue_burst().
+	 *
+	 * rte_smp_wmb() is for making sure the mac is copied
+	 * before the flag is set.
+	 */
+	rte_smp_wmb();
+	rte_atomic16_set(&dev->broadcast_rarp, 1);
+
+	return 0;
+}
+
+/* return bytes# of read on success or negative val on failure. */
+static int
+read_vhost_message(int sockfd, struct VhostUserMsg *msg)
+{
+	int ret;
+
+	ret = read_fd_message(sockfd, (char *)msg, VHOST_USER_HDR_SIZE,
+		msg->fds, VHOST_MEMORY_MAX_NREGIONS);
+	if (ret <= 0)
+		return ret;
+
+	if (msg && msg->size) {
+		if (msg->size > sizeof(msg->payload)) {
+			RTE_LOG(ERR, VHOST_CONFIG,
+				"invalid msg size: %d\n", msg->size);
+			return -1;
+		}
+		ret = read(sockfd, &msg->payload, msg->size);
+		if (ret <= 0)
+			return ret;
+		if (ret != (int)msg->size) {
+			RTE_LOG(ERR, VHOST_CONFIG,
+				"read control message failed\n");
+			return -1;
+		}
+	}
+
+	return ret;
+}
+
+static int
+send_vhost_message(int sockfd, struct VhostUserMsg *msg)
+{
+	int ret;
+
+	if (!msg)
+		return 0;
+
+	msg->flags &= ~VHOST_USER_VERSION_MASK;
+	msg->flags |= VHOST_USER_VERSION;
+	msg->flags |= VHOST_USER_REPLY_MASK;
+
+	ret = send_fd_message(sockfd, (char *)msg,
+		VHOST_USER_HDR_SIZE + msg->size, NULL, 0);
+
+	return ret;
+}
+
+int
+vhost_user_msg_handler(int vid, int fd)
+{
+	struct virtio_net *dev;
+	struct VhostUserMsg msg;
+	int ret;
+
+	dev = get_device(vid);
+	if (dev == NULL)
+		return -1;
+
+	ret = read_vhost_message(fd, &msg);
+	if (ret <= 0 || msg.request >= VHOST_USER_MAX) {
+		if (ret < 0)
+			RTE_LOG(ERR, VHOST_CONFIG,
+				"vhost read message failed\n");
+		else if (ret == 0)
+			RTE_LOG(INFO, VHOST_CONFIG,
+				"vhost peer closed\n");
+		else
+			RTE_LOG(ERR, VHOST_CONFIG,
+				"vhost read incorrect message\n");
+
+		return -1;
+	}
+
+	RTE_LOG(INFO, VHOST_CONFIG, "read message %s\n",
+		vhost_message_str[msg.request]);
+	switch (msg.request) {
+	case VHOST_USER_GET_FEATURES:
+		msg.payload.u64 = vhost_user_get_features();
+		msg.size = sizeof(msg.payload.u64);
+		send_vhost_message(fd, &msg);
+		break;
+	case VHOST_USER_SET_FEATURES:
+		vhost_user_set_features(dev, msg.payload.u64);
+		break;
+
+	case VHOST_USER_GET_PROTOCOL_FEATURES:
+		msg.payload.u64 = VHOST_USER_PROTOCOL_FEATURES;
+		msg.size = sizeof(msg.payload.u64);
+		send_vhost_message(fd, &msg);
+		break;
+	case VHOST_USER_SET_PROTOCOL_FEATURES:
+		vhost_user_set_protocol_features(dev, msg.payload.u64);
+		break;
+
+	case VHOST_USER_SET_OWNER:
+		vhost_user_set_owner();
+		break;
+	case VHOST_USER_RESET_OWNER:
+		vhost_user_reset_owner(dev);
+		break;
+
+	case VHOST_USER_SET_MEM_TABLE:
+		vhost_user_set_mem_table(dev, &msg);
+		break;
+
+	case VHOST_USER_SET_LOG_BASE:
+		vhost_user_set_log_base(dev, &msg);
+
+		/* it needs a reply */
+		msg.size = sizeof(msg.payload.u64);
+		send_vhost_message(fd, &msg);
+		break;
+	case VHOST_USER_SET_LOG_FD:
+		close(msg.fds[0]);
+		RTE_LOG(INFO, VHOST_CONFIG, "not implemented.\n");
+		break;
+
+	case VHOST_USER_SET_VRING_NUM:
+		vhost_user_set_vring_num(dev, &msg);
+		break;
+	case VHOST_USER_SET_VRING_ADDR:
+		vhost_user_set_vring_addr(dev, &msg);
+		break;
+	case VHOST_USER_SET_VRING_BASE:
+		vhost_user_set_vring_base(dev, &msg);
+		break;
+
+	case VHOST_USER_GET_VRING_BASE:
+		vhost_user_get_vring_base(dev, &msg);
+		msg.size = sizeof(msg.payload.state);
+		send_vhost_message(fd, &msg);
+		break;
+
+	case VHOST_USER_SET_VRING_KICK:
+		vhost_user_set_vring_kick(dev, &msg);
+		break;
+	case VHOST_USER_SET_VRING_CALL:
+		vhost_user_set_vring_call(dev, &msg);
+		break;
+
+	case VHOST_USER_SET_VRING_ERR:
+		if (!(msg.payload.u64 & VHOST_USER_VRING_NOFD_MASK))
+			close(msg.fds[0]);
+		RTE_LOG(INFO, VHOST_CONFIG, "not implemented\n");
+		break;
+
+	case VHOST_USER_GET_QUEUE_NUM:
+		msg.payload.u64 = VHOST_MAX_QUEUE_PAIRS;
+		msg.size = sizeof(msg.payload.u64);
+		send_vhost_message(fd, &msg);
+		break;
+
+	case VHOST_USER_SET_VRING_ENABLE:
+		vhost_user_set_vring_enable(dev, &msg);
+		break;
+	case VHOST_USER_SEND_RARP:
+		vhost_user_send_rarp(dev, &msg);
+		break;
+
+	default:
+		break;
+
+	}
+
+	return 0;
+}
diff --git a/lib/vhost/rte_vhost/vhost_user.h b/lib/vhost/rte_vhost/vhost_user.h
new file mode 100644
index 000000000..ba78d3268
--- /dev/null
+++ b/lib/vhost/rte_vhost/vhost_user.h
@@ -0,0 +1,128 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _VHOST_NET_USER_H
+#define _VHOST_NET_USER_H
+
+#include <stdint.h>
+#include <linux/vhost.h>
+
+#include "rte_virtio_net.h"
+
+/* refer to hw/virtio/vhost-user.c */
+
+#define VHOST_MEMORY_MAX_NREGIONS 8
+
+#define VHOST_USER_PROTOCOL_F_MQ	0
+#define VHOST_USER_PROTOCOL_F_LOG_SHMFD	1
+#define VHOST_USER_PROTOCOL_F_RARP	2
+
+#define VHOST_USER_PROTOCOL_FEATURES	((1ULL << VHOST_USER_PROTOCOL_F_MQ) | \
+					 (1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD) |\
+					 (1ULL << VHOST_USER_PROTOCOL_F_RARP))
+
+typedef enum VhostUserRequest {
+	VHOST_USER_NONE = 0,
+	VHOST_USER_GET_FEATURES = 1,
+	VHOST_USER_SET_FEATURES = 2,
+	VHOST_USER_SET_OWNER = 3,
+	VHOST_USER_RESET_OWNER = 4,
+	VHOST_USER_SET_MEM_TABLE = 5,
+	VHOST_USER_SET_LOG_BASE = 6,
+	VHOST_USER_SET_LOG_FD = 7,
+	VHOST_USER_SET_VRING_NUM = 8,
+	VHOST_USER_SET_VRING_ADDR = 9,
+	VHOST_USER_SET_VRING_BASE = 10,
+	VHOST_USER_GET_VRING_BASE = 11,
+	VHOST_USER_SET_VRING_KICK = 12,
+	VHOST_USER_SET_VRING_CALL = 13,
+	VHOST_USER_SET_VRING_ERR = 14,
+	VHOST_USER_GET_PROTOCOL_FEATURES = 15,
+	VHOST_USER_SET_PROTOCOL_FEATURES = 16,
+	VHOST_USER_GET_QUEUE_NUM = 17,
+	VHOST_USER_SET_VRING_ENABLE = 18,
+	VHOST_USER_SEND_RARP = 19,
+	VHOST_USER_MAX
+} VhostUserRequest;
+
+typedef struct VhostUserMemoryRegion {
+	uint64_t guest_phys_addr;
+	uint64_t memory_size;
+	uint64_t userspace_addr;
+	uint64_t mmap_offset;
+} VhostUserMemoryRegion;
+
+typedef struct VhostUserMemory {
+	uint32_t nregions;
+	uint32_t padding;
+	VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
+} VhostUserMemory;
+
+typedef struct VhostUserLog {
+	uint64_t mmap_size;
+	uint64_t mmap_offset;
+} VhostUserLog;
+
+typedef struct VhostUserMsg {
+	VhostUserRequest request;
+
+#define VHOST_USER_VERSION_MASK     0x3
+#define VHOST_USER_REPLY_MASK       (0x1 << 2)
+	uint32_t flags;
+	uint32_t size; /* the following payload size */
+	union {
+#define VHOST_USER_VRING_IDX_MASK   0xff
+#define VHOST_USER_VRING_NOFD_MASK  (0x1<<8)
+		uint64_t u64;
+		struct vhost_vring_state state;
+		struct vhost_vring_addr addr;
+		VhostUserMemory memory;
+		VhostUserLog    log;
+	} payload;
+	int fds[VHOST_MEMORY_MAX_NREGIONS];
+} __attribute((packed)) VhostUserMsg;
+
+#define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64)
+
+/* The version of the protocol we support */
+#define VHOST_USER_VERSION    0x1
+
+
+/* vhost_user.c */
+int vhost_user_msg_handler(int vid, int fd);
+
+/* socket.c */
+int read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num);
+int send_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num);
+
+#endif
diff --git a/lib/vhost/rte_vhost/virtio_net.c b/lib/vhost/rte_vhost/virtio_net.c
new file mode 100644
index 000000000..e0df0b972
--- /dev/null
+++ b/lib/vhost/rte_vhost/virtio_net.c
@@ -0,0 +1,1186 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <stdbool.h>
+#include <linux/virtio_net.h>
+
+#include <rte_mbuf.h>
+#include <rte_memcpy.h>
+#include <rte_ether.h>
+#include <rte_ip.h>
+#include <rte_virtio_net.h>
+#include <rte_tcp.h>
+#include <rte_udp.h>
+#include <rte_sctp.h>
+#include <rte_arp.h>
+
+#include "vhost.h"
+
+#define MAX_PKT_BURST 32
+#define VHOST_LOG_PAGE	4096
+
+static inline void __attribute__((always_inline))
+vhost_log_page(uint8_t *log_base, uint64_t page)
+{
+	log_base[page / 8] |= 1 << (page % 8);
+}
+
+static inline void __attribute__((always_inline))
+vhost_log_write(struct virtio_net *dev, uint64_t addr, uint64_t len)
+{
+	uint64_t page;
+
+	if (likely(((dev->features & (1ULL << VHOST_F_LOG_ALL)) == 0) ||
+		   !dev->log_base || !len))
+		return;
+
+	if (unlikely(dev->log_size <= ((addr + len - 1) / VHOST_LOG_PAGE / 8)))
+		return;
+
+	/* To make sure guest memory updates are committed before logging */
+	rte_smp_wmb();
+
+	page = addr / VHOST_LOG_PAGE;
+	while (page * VHOST_LOG_PAGE < addr + len) {
+		vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
+		page += 1;
+	}
+}
+
+static inline void __attribute__((always_inline))
+vhost_log_used_vring(struct virtio_net *dev, struct vhost_virtqueue *vq,
+		     uint64_t offset, uint64_t len)
+{
+	vhost_log_write(dev, vq->log_guest_addr + offset, len);
+}
+
+static bool
+is_valid_virt_queue_idx(uint32_t idx, int is_tx, uint32_t qp_nb)
+{
+	return (is_tx ^ (idx & 1)) == 0 && idx < qp_nb * VIRTIO_QNUM;
+}
+
+static inline void __attribute__((always_inline))
+do_flush_shadow_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq,
+			  uint16_t to, uint16_t from, uint16_t size)
+{
+	rte_memcpy(&vq->used->ring[to],
+			&vq->shadow_used_ring[from],
+			size * sizeof(struct vring_used_elem));
+	vhost_log_used_vring(dev, vq,
+			offsetof(struct vring_used, ring[to]),
+			size * sizeof(struct vring_used_elem));
+}
+
+static inline void __attribute__((always_inline))
+flush_shadow_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq)
+{
+	uint16_t used_idx = vq->last_used_idx & (vq->size - 1);
+
+	if (used_idx + vq->shadow_used_idx <= vq->size) {
+		do_flush_shadow_used_ring(dev, vq, used_idx, 0,
+					  vq->shadow_used_idx);
+	} else {
+		uint16_t size;
+
+		/* update used ring interval [used_idx, vq->size] */
+		size = vq->size - used_idx;
+		do_flush_shadow_used_ring(dev, vq, used_idx, 0, size);
+
+		/* update the left half used ring interval [0, left_size] */
+		do_flush_shadow_used_ring(dev, vq, 0, size,
+					  vq->shadow_used_idx - size);
+	}
+	vq->last_used_idx += vq->shadow_used_idx;
+
+	rte_smp_wmb();
+
+	*(volatile uint16_t *)&vq->used->idx += vq->shadow_used_idx;
+	vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx),
+		sizeof(vq->used->idx));
+}
+
+static inline void __attribute__((always_inline))
+update_shadow_used_ring(struct vhost_virtqueue *vq,
+			 uint16_t desc_idx, uint16_t len)
+{
+	uint16_t i = vq->shadow_used_idx++;
+
+	vq->shadow_used_ring[i].id  = desc_idx;
+	vq->shadow_used_ring[i].len = len;
+}
+
+static void
+virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr *net_hdr)
+{
+	if (m_buf->ol_flags & PKT_TX_L4_MASK) {
+		net_hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
+		net_hdr->csum_start = m_buf->l2_len + m_buf->l3_len;
+
+		switch (m_buf->ol_flags & PKT_TX_L4_MASK) {
+		case PKT_TX_TCP_CKSUM:
+			net_hdr->csum_offset = (offsetof(struct tcp_hdr,
+						cksum));
+			break;
+		case PKT_TX_UDP_CKSUM:
+			net_hdr->csum_offset = (offsetof(struct udp_hdr,
+						dgram_cksum));
+			break;
+		case PKT_TX_SCTP_CKSUM:
+			net_hdr->csum_offset = (offsetof(struct sctp_hdr,
+						cksum));
+			break;
+		}
+	}
+
+	if (m_buf->ol_flags & PKT_TX_TCP_SEG) {
+		if (m_buf->ol_flags & PKT_TX_IPV4)
+			net_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
+		else
+			net_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
+		net_hdr->gso_size = m_buf->tso_segsz;
+		net_hdr->hdr_len = m_buf->l2_len + m_buf->l3_len
+					+ m_buf->l4_len;
+	}
+}
+
+static inline void
+copy_virtio_net_hdr(struct virtio_net *dev, uint64_t desc_addr,
+		    struct virtio_net_hdr_mrg_rxbuf hdr)
+{
+	if (dev->vhost_hlen == sizeof(struct virtio_net_hdr_mrg_rxbuf))
+		*(struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)desc_addr = hdr;
+	else
+		*(struct virtio_net_hdr *)(uintptr_t)desc_addr = hdr.hdr;
+}
+
+static inline int __attribute__((always_inline))
+copy_mbuf_to_desc(struct virtio_net *dev, struct vring_desc *descs,
+		  struct rte_mbuf *m, uint16_t desc_idx, uint32_t size)
+{
+	uint32_t desc_avail, desc_offset;
+	uint32_t mbuf_avail, mbuf_offset;
+	uint32_t cpy_len;
+	struct vring_desc *desc;
+	uint64_t desc_addr;
+	struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0};
+
+	desc = &descs[desc_idx];
+	desc_addr = gpa_to_vva(dev, desc->addr);
+	/*
+	 * Checking of 'desc_addr' placed outside of 'unlikely' macro to avoid
+	 * performance issue with some versions of gcc (4.8.4 and 5.3.0) which
+	 * otherwise stores offset on the stack instead of in a register.
+	 */
+	if (unlikely(desc->len < dev->vhost_hlen) || !desc_addr)
+		return -1;
+
+	rte_prefetch0((void *)(uintptr_t)desc_addr);
+
+	virtio_enqueue_offload(m, &virtio_hdr.hdr);
+	copy_virtio_net_hdr(dev, desc_addr, virtio_hdr);
+	vhost_log_write(dev, desc->addr, dev->vhost_hlen);
+	PRINT_PACKET(dev, (uintptr_t)desc_addr, dev->vhost_hlen, 0);
+
+	desc_offset = dev->vhost_hlen;
+	desc_avail  = desc->len - dev->vhost_hlen;
+
+	mbuf_avail  = rte_pktmbuf_data_len(m);
+	mbuf_offset = 0;
+	while (mbuf_avail != 0 || m->next != NULL) {
+		/* done with current mbuf, fetch next */
+		if (mbuf_avail == 0) {
+			m = m->next;
+
+			mbuf_offset = 0;
+			mbuf_avail  = rte_pktmbuf_data_len(m);
+		}
+
+		/* done with current desc buf, fetch next */
+		if (desc_avail == 0) {
+			if ((desc->flags & VRING_DESC_F_NEXT) == 0) {
+				/* Room in vring buffer is not enough */
+				return -1;
+			}
+			if (unlikely(desc->next >= size))
+				return -1;
+
+			desc = &descs[desc->next];
+			desc_addr = gpa_to_vva(dev, desc->addr);
+			if (unlikely(!desc_addr))
+				return -1;
+
+			desc_offset = 0;
+			desc_avail  = desc->len;
+		}
+
+		cpy_len = RTE_MIN(desc_avail, mbuf_avail);
+		rte_memcpy((void *)((uintptr_t)(desc_addr + desc_offset)),
+			rte_pktmbuf_mtod_offset(m, void *, mbuf_offset),
+			cpy_len);
+		vhost_log_write(dev, desc->addr + desc_offset, cpy_len);
+		PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset),
+			     cpy_len, 0);
+
+		mbuf_avail  -= cpy_len;
+		mbuf_offset += cpy_len;
+		desc_avail  -= cpy_len;
+		desc_offset += cpy_len;
+	}
+
+	return 0;
+}
+
+/**
+ * This function adds buffers to the virtio devices RX virtqueue. Buffers can
+ * be received from the physical port or from another virtio device. A packet
+ * count is returned to indicate the number of packets that are succesfully
+ * added to the RX queue. This function works when the mbuf is scattered, but
+ * it doesn't support the mergeable feature.
+ */
+static inline uint32_t __attribute__((always_inline))
+virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
+	      struct rte_mbuf **pkts, uint32_t count)
+{
+	struct vhost_virtqueue *vq;
+	uint16_t avail_idx, free_entries, start_idx;
+	uint16_t desc_indexes[MAX_PKT_BURST];
+	struct vring_desc *descs;
+	uint16_t used_idx;
+	uint32_t i, sz;
+
+	LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__);
+	if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->virt_qp_nb))) {
+		RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n",
+			dev->vid, __func__, queue_id);
+		return 0;
+	}
+
+	vq = dev->virtqueue[queue_id];
+	if (unlikely(vq->enabled == 0))
+		return 0;
+
+	avail_idx = *((volatile uint16_t *)&vq->avail->idx);
+	start_idx = vq->last_used_idx;
+	free_entries = avail_idx - start_idx;
+	count = RTE_MIN(count, free_entries);
+	count = RTE_MIN(count, (uint32_t)MAX_PKT_BURST);
+	if (count == 0)
+		return 0;
+
+	LOG_DEBUG(VHOST_DATA, "(%d) start_idx %d | end_idx %d\n",
+		dev->vid, start_idx, start_idx + count);
+
+	/* Retrieve all of the desc indexes first to avoid caching issues. */
+	rte_prefetch0(&vq->avail->ring[start_idx & (vq->size - 1)]);
+	for (i = 0; i < count; i++) {
+		used_idx = (start_idx + i) & (vq->size - 1);
+		desc_indexes[i] = vq->avail->ring[used_idx];
+		vq->used->ring[used_idx].id = desc_indexes[i];
+		vq->used->ring[used_idx].len = pkts[i]->pkt_len +
+					       dev->vhost_hlen;
+		vhost_log_used_vring(dev, vq,
+			offsetof(struct vring_used, ring[used_idx]),
+			sizeof(vq->used->ring[used_idx]));
+	}
+
+	rte_prefetch0(&vq->desc[desc_indexes[0]]);
+	for (i = 0; i < count; i++) {
+		uint16_t desc_idx = desc_indexes[i];
+		int err;
+
+		if (vq->desc[desc_idx].flags & VRING_DESC_F_INDIRECT) {
+			descs = (struct vring_desc *)(uintptr_t)gpa_to_vva(dev,
+					vq->desc[desc_idx].addr);
+			if (unlikely(!descs)) {
+				count = i;
+				break;
+			}
+
+			desc_idx = 0;
+			sz = vq->desc[desc_idx].len / sizeof(*descs);
+		} else {
+			descs = vq->desc;
+			sz = vq->size;
+		}
+
+		err = copy_mbuf_to_desc(dev, descs, pkts[i], desc_idx, sz);
+		if (unlikely(err)) {
+			used_idx = (start_idx + i) & (vq->size - 1);
+			vq->used->ring[used_idx].len = dev->vhost_hlen;
+			vhost_log_used_vring(dev, vq,
+				offsetof(struct vring_used, ring[used_idx]),
+				sizeof(vq->used->ring[used_idx]));
+		}
+
+		if (i + 1 < count)
+			rte_prefetch0(&vq->desc[desc_indexes[i+1]]);
+	}
+
+	rte_smp_wmb();
+
+	*(volatile uint16_t *)&vq->used->idx += count;
+	vq->last_used_idx += count;
+	vhost_log_used_vring(dev, vq,
+		offsetof(struct vring_used, idx),
+		sizeof(vq->used->idx));
+
+	/* flush used->idx update before we read avail->flags. */
+	rte_mb();
+
+	/* Kick the guest if necessary. */
+	if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)
+			&& (vq->callfd >= 0))
+		eventfd_write(vq->callfd, (eventfd_t)1);
+	return count;
+}
+
+static inline int __attribute__((always_inline))
+fill_vec_buf(struct virtio_net *dev, struct vhost_virtqueue *vq,
+			 uint32_t avail_idx, uint32_t *vec_idx,
+			 struct buf_vector *buf_vec, uint16_t *desc_chain_head,
+			 uint16_t *desc_chain_len)
+{
+	uint16_t idx = vq->avail->ring[avail_idx & (vq->size - 1)];
+	uint32_t vec_id = *vec_idx;
+	uint32_t len    = 0;
+	struct vring_desc *descs = vq->desc;
+
+	*desc_chain_head = idx;
+
+	if (vq->desc[idx].flags & VRING_DESC_F_INDIRECT) {
+		descs = (struct vring_desc *)(uintptr_t)
+					gpa_to_vva(dev, vq->desc[idx].addr);
+		if (unlikely(!descs))
+			return -1;
+
+		idx = 0;
+	}
+
+	while (1) {
+		if (unlikely(vec_id >= BUF_VECTOR_MAX || idx >= vq->size))
+			return -1;
+
+		len += descs[idx].len;
+		buf_vec[vec_id].buf_addr = descs[idx].addr;
+		buf_vec[vec_id].buf_len  = descs[idx].len;
+		buf_vec[vec_id].desc_idx = idx;
+		vec_id++;
+
+		if ((descs[idx].flags & VRING_DESC_F_NEXT) == 0)
+			break;
+
+		idx = descs[idx].next;
+	}
+
+	*desc_chain_len = len;
+	*vec_idx = vec_id;
+
+	return 0;
+}
+
+/*
+ * Returns -1 on fail, 0 on success
+ */
+static inline int
+reserve_avail_buf_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq,
+				uint32_t size, struct buf_vector *buf_vec,
+				uint16_t *num_buffers, uint16_t avail_head)
+{
+	uint16_t cur_idx;
+	uint32_t vec_idx = 0;
+	uint16_t tries = 0;
+
+	uint16_t head_idx = 0;
+	uint16_t len = 0;
+
+	*num_buffers = 0;
+	cur_idx  = vq->last_avail_idx;
+
+	while (size > 0) {
+		if (unlikely(cur_idx == avail_head))
+			return -1;
+
+		if (unlikely(fill_vec_buf(dev, vq, cur_idx, &vec_idx, buf_vec,
+						&head_idx, &len) < 0))
+			return -1;
+		len = RTE_MIN(len, size);
+		update_shadow_used_ring(vq, head_idx, len);
+		size -= len;
+
+		cur_idx++;
+		tries++;
+		*num_buffers += 1;
+
+		/*
+		 * if we tried all available ring items, and still
+		 * can't get enough buf, it means something abnormal
+		 * happened.
+		 */
+		if (unlikely(tries >= vq->size))
+			return -1;
+	}
+
+	return 0;
+}
+
+static inline int __attribute__((always_inline))
+copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct rte_mbuf *m,
+			    struct buf_vector *buf_vec, uint16_t num_buffers)
+{
+	struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0};
+	uint32_t vec_idx = 0;
+	uint64_t desc_addr;
+	uint32_t mbuf_offset, mbuf_avail;
+	uint32_t desc_offset, desc_avail;
+	uint32_t cpy_len;
+	uint64_t hdr_addr, hdr_phys_addr;
+	struct rte_mbuf *hdr_mbuf;
+
+	if (unlikely(m == NULL))
+		return -1;
+
+	desc_addr = gpa_to_vva(dev, buf_vec[vec_idx].buf_addr);
+	if (buf_vec[vec_idx].buf_len < dev->vhost_hlen || !desc_addr)
+		return -1;
+
+	hdr_mbuf = m;
+	hdr_addr = desc_addr;
+	hdr_phys_addr = buf_vec[vec_idx].buf_addr;
+	rte_prefetch0((void *)(uintptr_t)hdr_addr);
+
+	virtio_hdr.num_buffers = num_buffers;
+	LOG_DEBUG(VHOST_DATA, "(%d) RX: num merge buffers %d\n",
+		dev->vid, num_buffers);
+
+	desc_avail  = buf_vec[vec_idx].buf_len - dev->vhost_hlen;
+	desc_offset = dev->vhost_hlen;
+
+	mbuf_avail  = rte_pktmbuf_data_len(m);
+	mbuf_offset = 0;
+	while (mbuf_avail != 0 || m->next != NULL) {
+		/* done with current desc buf, get the next one */
+		if (desc_avail == 0) {
+			vec_idx++;
+			desc_addr = gpa_to_vva(dev, buf_vec[vec_idx].buf_addr);
+			if (unlikely(!desc_addr))
+				return -1;
+
+			/* Prefetch buffer address. */
+			rte_prefetch0((void *)(uintptr_t)desc_addr);
+			desc_offset = 0;
+			desc_avail  = buf_vec[vec_idx].buf_len;
+		}
+
+		/* done with current mbuf, get the next one */
+		if (mbuf_avail == 0) {
+			m = m->next;
+
+			mbuf_offset = 0;
+			mbuf_avail  = rte_pktmbuf_data_len(m);
+		}
+
+		if (hdr_addr) {
+			virtio_enqueue_offload(hdr_mbuf, &virtio_hdr.hdr);
+			copy_virtio_net_hdr(dev, hdr_addr, virtio_hdr);
+			vhost_log_write(dev, hdr_phys_addr, dev->vhost_hlen);
+			PRINT_PACKET(dev, (uintptr_t)hdr_addr,
+				     dev->vhost_hlen, 0);
+
+			hdr_addr = 0;
+		}
+
+		cpy_len = RTE_MIN(desc_avail, mbuf_avail);
+		rte_memcpy((void *)((uintptr_t)(desc_addr + desc_offset)),
+			rte_pktmbuf_mtod_offset(m, void *, mbuf_offset),
+			cpy_len);
+		vhost_log_write(dev, buf_vec[vec_idx].buf_addr + desc_offset,
+			cpy_len);
+		PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset),
+			cpy_len, 0);
+
+		mbuf_avail  -= cpy_len;
+		mbuf_offset += cpy_len;
+		desc_avail  -= cpy_len;
+		desc_offset += cpy_len;
+	}
+
+	return 0;
+}
+
+static inline uint32_t __attribute__((always_inline))
+virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
+	struct rte_mbuf **pkts, uint32_t count)
+{
+	struct vhost_virtqueue *vq;
+	uint32_t pkt_idx = 0;
+	uint16_t num_buffers;
+	struct buf_vector buf_vec[BUF_VECTOR_MAX];
+	uint16_t avail_head;
+
+	LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__);
+	if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->virt_qp_nb))) {
+		RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n",
+			dev->vid, __func__, queue_id);
+		return 0;
+	}
+
+	vq = dev->virtqueue[queue_id];
+	if (unlikely(vq->enabled == 0))
+		return 0;
+
+	count = RTE_MIN((uint32_t)MAX_PKT_BURST, count);
+	if (count == 0)
+		return 0;
+
+	rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]);
+
+	vq->shadow_used_idx = 0;
+	avail_head = *((volatile uint16_t *)&vq->avail->idx);
+	for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
+		uint32_t pkt_len = pkts[pkt_idx]->pkt_len + dev->vhost_hlen;
+
+		if (unlikely(reserve_avail_buf_mergeable(dev, vq,
+						pkt_len, buf_vec, &num_buffers,
+						avail_head) < 0)) {
+			LOG_DEBUG(VHOST_DATA,
+				"(%d) failed to get enough desc from vring\n",
+				dev->vid);
+			vq->shadow_used_idx -= num_buffers;
+			break;
+		}
+
+		LOG_DEBUG(VHOST_DATA, "(%d) current index %d | end index %d\n",
+			dev->vid, vq->last_avail_idx,
+			vq->last_avail_idx + num_buffers);
+
+		if (pkt_len > 0 &&
+		    copy_mbuf_to_desc_mergeable(dev, pkts[pkt_idx], buf_vec, num_buffers) < 0) {
+			vq->shadow_used_idx -= num_buffers;
+			break;
+		}
+
+		vq->last_avail_idx += num_buffers;
+	}
+
+	if (likely(vq->shadow_used_idx)) {
+		flush_shadow_used_ring(dev, vq);
+
+		/* flush used->idx update before we read avail->flags. */
+		rte_mb();
+
+		/* Kick the guest if necessary. */
+		if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)
+				&& (vq->callfd >= 0))
+			eventfd_write(vq->callfd, (eventfd_t)1);
+	}
+
+	return pkt_idx;
+}
+
+uint16_t
+rte_vhost_enqueue_burst(int vid, uint16_t queue_id,
+	struct rte_mbuf **pkts, uint16_t count)
+{
+	struct virtio_net *dev = get_device(vid);
+
+	if (!dev)
+		return 0;
+
+	if (dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF))
+		return virtio_dev_merge_rx(dev, queue_id, pkts, count);
+	else
+		return virtio_dev_rx(dev, queue_id, pkts, count);
+}
+
+static inline bool
+virtio_net_with_host_offload(struct virtio_net *dev)
+{
+	if (dev->features &
+			(VIRTIO_NET_F_CSUM | VIRTIO_NET_F_HOST_ECN |
+			 VIRTIO_NET_F_HOST_TSO4 | VIRTIO_NET_F_HOST_TSO6 |
+			 VIRTIO_NET_F_HOST_UFO))
+		return true;
+
+	return false;
+}
+
+static void
+parse_ethernet(struct rte_mbuf *m, uint16_t *l4_proto, void **l4_hdr)
+{
+	struct ipv4_hdr *ipv4_hdr;
+	struct ipv6_hdr *ipv6_hdr;
+	void *l3_hdr = NULL;
+	struct ether_hdr *eth_hdr;
+	uint16_t ethertype;
+
+	eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
+
+	m->l2_len = sizeof(struct ether_hdr);
+	ethertype = rte_be_to_cpu_16(eth_hdr->ether_type);
+
+	if (ethertype == ETHER_TYPE_VLAN) {
+		struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
+
+		m->l2_len += sizeof(struct vlan_hdr);
+		ethertype = rte_be_to_cpu_16(vlan_hdr->eth_proto);
+	}
+
+	l3_hdr = (char *)eth_hdr + m->l2_len;
+
+	switch (ethertype) {
+	case ETHER_TYPE_IPv4:
+		ipv4_hdr = (struct ipv4_hdr *)l3_hdr;
+		*l4_proto = ipv4_hdr->next_proto_id;
+		m->l3_len = (ipv4_hdr->version_ihl & 0x0f) * 4;
+		*l4_hdr = (char *)l3_hdr + m->l3_len;
+		m->ol_flags |= PKT_TX_IPV4;
+		break;
+	case ETHER_TYPE_IPv6:
+		ipv6_hdr = (struct ipv6_hdr *)l3_hdr;
+		*l4_proto = ipv6_hdr->proto;
+		m->l3_len = sizeof(struct ipv6_hdr);
+		*l4_hdr = (char *)l3_hdr + m->l3_len;
+		m->ol_flags |= PKT_TX_IPV6;
+		break;
+	default:
+		m->l3_len = 0;
+		*l4_proto = 0;
+		break;
+	}
+}
+
+static inline void __attribute__((always_inline))
+vhost_dequeue_offload(struct virtio_net_hdr *hdr, struct rte_mbuf *m)
+{
+	uint16_t l4_proto = 0;
+	void *l4_hdr = NULL;
+	struct tcp_hdr *tcp_hdr = NULL;
+
+	if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE)
+		return;
+
+	parse_ethernet(m, &l4_proto, &l4_hdr);
+	if (hdr->flags == VIRTIO_NET_HDR_F_NEEDS_CSUM) {
+		if (hdr->csum_start == (m->l2_len + m->l3_len)) {
+			switch (hdr->csum_offset) {
+			case (offsetof(struct tcp_hdr, cksum)):
+				if (l4_proto == IPPROTO_TCP)
+					m->ol_flags |= PKT_TX_TCP_CKSUM;
+				break;
+			case (offsetof(struct udp_hdr, dgram_cksum)):
+				if (l4_proto == IPPROTO_UDP)
+					m->ol_flags |= PKT_TX_UDP_CKSUM;
+				break;
+			case (offsetof(struct sctp_hdr, cksum)):
+				if (l4_proto == IPPROTO_SCTP)
+					m->ol_flags |= PKT_TX_SCTP_CKSUM;
+				break;
+			default:
+				break;
+			}
+		}
+	}
+
+	if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
+		switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
+		case VIRTIO_NET_HDR_GSO_TCPV4:
+		case VIRTIO_NET_HDR_GSO_TCPV6:
+			if (l4_hdr == NULL) {
+				RTE_LOG(ERR, VHOST_DATA, "l4_hdr is NULL\n");
+				break;
+			}
+			tcp_hdr = (struct tcp_hdr *)l4_hdr;
+			m->ol_flags |= PKT_TX_TCP_SEG;
+			m->tso_segsz = hdr->gso_size;
+			m->l4_len = (tcp_hdr->data_off & 0xf0) >> 2;
+			break;
+		default:
+			RTE_LOG(WARNING, VHOST_DATA,
+				"unsupported gso type %u.\n", hdr->gso_type);
+			break;
+		}
+	}
+}
+
+#define RARP_PKT_SIZE	64
+
+static int
+make_rarp_packet(struct rte_mbuf *rarp_mbuf, const struct ether_addr *mac)
+{
+	struct ether_hdr *eth_hdr;
+	struct arp_hdr  *rarp;
+
+	if (rarp_mbuf->buf_len < 64) {
+		RTE_LOG(WARNING, VHOST_DATA,
+			"failed to make RARP; mbuf size too small %u (< %d)\n",
+			rarp_mbuf->buf_len, RARP_PKT_SIZE);
+		return -1;
+	}
+
+	/* Ethernet header. */
+	eth_hdr = rte_pktmbuf_mtod_offset(rarp_mbuf, struct ether_hdr *, 0);
+	memset(eth_hdr->d_addr.addr_bytes, 0xff, ETHER_ADDR_LEN);
+	ether_addr_copy(mac, &eth_hdr->s_addr);
+	eth_hdr->ether_type = htons(ETHER_TYPE_RARP);
+
+	/* RARP header. */
+	rarp = (struct arp_hdr *)(eth_hdr + 1);
+	rarp->arp_hrd = htons(ARP_HRD_ETHER);
+	rarp->arp_pro = htons(ETHER_TYPE_IPv4);
+	rarp->arp_hln = ETHER_ADDR_LEN;
+	rarp->arp_pln = 4;
+	rarp->arp_op  = htons(ARP_OP_REVREQUEST);
+
+	ether_addr_copy(mac, &rarp->arp_data.arp_sha);
+	ether_addr_copy(mac, &rarp->arp_data.arp_tha);
+	memset(&rarp->arp_data.arp_sip, 0x00, 4);
+	memset(&rarp->arp_data.arp_tip, 0x00, 4);
+
+	rarp_mbuf->pkt_len  = rarp_mbuf->data_len = RARP_PKT_SIZE;
+
+	return 0;
+}
+
+static inline void __attribute__((always_inline))
+put_zmbuf(struct zcopy_mbuf *zmbuf)
+{
+	zmbuf->in_use = 0;
+}
+
+static inline int __attribute__((always_inline))
+copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs,
+		  uint16_t max_desc, struct rte_mbuf *m, uint16_t desc_idx,
+		  struct rte_mempool *mbuf_pool)
+{
+	struct vring_desc *desc;
+	uint64_t desc_addr;
+	uint32_t desc_avail, desc_offset;
+	uint32_t mbuf_avail, mbuf_offset;
+	uint32_t cpy_len;
+	struct rte_mbuf *cur = m, *prev = m;
+	struct virtio_net_hdr *hdr = NULL;
+	/* A counter to avoid desc dead loop chain */
+	uint32_t nr_desc = 1;
+
+	desc = &descs[desc_idx];
+	if (unlikely((desc->len < dev->vhost_hlen)) ||
+			(desc->flags & VRING_DESC_F_INDIRECT))
+		return -1;
+
+	desc_addr = gpa_to_vva(dev, desc->addr);
+	if (unlikely(!desc_addr))
+		return -1;
+
+	if (virtio_net_with_host_offload(dev)) {
+		hdr = (struct virtio_net_hdr *)((uintptr_t)desc_addr);
+		rte_prefetch0(hdr);
+	}
+
+	/*
+	 * A virtio driver normally uses at least 2 desc buffers
+	 * for Tx: the first for storing the header, and others
+	 * for storing the data.
+	 */
+	if (likely((desc->len == dev->vhost_hlen) &&
+		   (desc->flags & VRING_DESC_F_NEXT) != 0)) {
+		desc = &descs[desc->next];
+		if (unlikely(desc->flags & VRING_DESC_F_INDIRECT))
+			return -1;
+
+		desc_addr = gpa_to_vva(dev, desc->addr);
+		if (unlikely(!desc_addr))
+			return -1;
+
+		desc_offset = 0;
+		desc_avail  = desc->len;
+		nr_desc    += 1;
+	} else {
+		desc_avail  = desc->len - dev->vhost_hlen;
+		desc_offset = dev->vhost_hlen;
+	}
+
+	rte_prefetch0((void *)(uintptr_t)(desc_addr + desc_offset));
+
+	PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset), desc_avail, 0);
+
+	mbuf_offset = 0;
+	mbuf_avail  = m->buf_len - RTE_PKTMBUF_HEADROOM;
+	while (1) {
+		uint64_t hpa;
+
+		cpy_len = RTE_MIN(desc_avail, mbuf_avail);
+
+		/*
+		 * A desc buf might across two host physical pages that are
+		 * not continuous. In such case (gpa_to_hpa returns 0), data
+		 * will be copied even though zero copy is enabled.
+		 */
+		if (unlikely(dev->dequeue_zero_copy && (hpa = gpa_to_hpa(dev,
+					desc->addr + desc_offset, cpy_len)))) {
+			cur->data_len = cpy_len;
+			cur->data_off = 0;
+			cur->buf_addr = (void *)(uintptr_t)desc_addr;
+			cur->buf_physaddr = hpa;
+
+			/*
+			 * In zero copy mode, one mbuf can only reference data
+			 * for one or partial of one desc buff.
+			 */
+			mbuf_avail = cpy_len;
+		} else {
+			rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *,
+							   mbuf_offset),
+				(void *)((uintptr_t)(desc_addr + desc_offset)),
+				cpy_len);
+		}
+
+		mbuf_avail  -= cpy_len;
+		mbuf_offset += cpy_len;
+		desc_avail  -= cpy_len;
+		desc_offset += cpy_len;
+
+		/* This desc reaches to its end, get the next one */
+		if (desc_avail == 0) {
+			if ((desc->flags & VRING_DESC_F_NEXT) == 0)
+				break;
+
+			if (unlikely(desc->next >= max_desc ||
+				     ++nr_desc > max_desc))
+				return -1;
+			desc = &descs[desc->next];
+			if (unlikely(desc->flags & VRING_DESC_F_INDIRECT))
+				return -1;
+
+			desc_addr = gpa_to_vva(dev, desc->addr);
+			if (unlikely(!desc_addr))
+				return -1;
+
+			rte_prefetch0((void *)(uintptr_t)desc_addr);
+
+			desc_offset = 0;
+			desc_avail  = desc->len;
+
+			PRINT_PACKET(dev, (uintptr_t)desc_addr, desc->len, 0);
+		}
+
+		/*
+		 * This mbuf reaches to its end, get a new one
+		 * to hold more data.
+		 */
+		if (mbuf_avail == 0) {
+			cur = rte_pktmbuf_alloc(mbuf_pool);
+			if (unlikely(cur == NULL)) {
+				RTE_LOG(ERR, VHOST_DATA, "Failed to "
+					"allocate memory for mbuf.\n");
+				return -1;
+			}
+
+			prev->next = cur;
+			prev->data_len = mbuf_offset;
+			m->nb_segs += 1;
+			m->pkt_len += mbuf_offset;
+			prev = cur;
+
+			mbuf_offset = 0;
+			mbuf_avail  = cur->buf_len - RTE_PKTMBUF_HEADROOM;
+		}
+	}
+
+	prev->data_len = mbuf_offset;
+	m->pkt_len    += mbuf_offset;
+
+	if (hdr)
+		vhost_dequeue_offload(hdr, m);
+
+	return 0;
+}
+
+static inline void __attribute__((always_inline))
+update_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq,
+		 uint32_t used_idx, uint32_t desc_idx)
+{
+	vq->used->ring[used_idx].id  = desc_idx;
+	vq->used->ring[used_idx].len = 0;
+	vhost_log_used_vring(dev, vq,
+			offsetof(struct vring_used, ring[used_idx]),
+			sizeof(vq->used->ring[used_idx]));
+}
+
+static inline void __attribute__((always_inline))
+update_used_idx(struct virtio_net *dev, struct vhost_virtqueue *vq,
+		uint32_t count)
+{
+	if (unlikely(count == 0))
+		return;
+
+	rte_smp_wmb();
+	rte_smp_rmb();
+
+	vq->used->idx += count;
+	vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx),
+			sizeof(vq->used->idx));
+
+	/* Kick guest if required. */
+	if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)
+			&& (vq->callfd >= 0))
+		eventfd_write(vq->callfd, (eventfd_t)1);
+}
+
+static inline struct zcopy_mbuf *__attribute__((always_inline))
+get_zmbuf(struct vhost_virtqueue *vq)
+{
+	uint16_t i;
+	uint16_t last;
+	int tries = 0;
+
+	/* search [last_zmbuf_idx, zmbuf_size) */
+	i = vq->last_zmbuf_idx;
+	last = vq->zmbuf_size;
+
+again:
+	for (; i < last; i++) {
+		if (vq->zmbufs[i].in_use == 0) {
+			vq->last_zmbuf_idx = i + 1;
+			vq->zmbufs[i].in_use = 1;
+			return &vq->zmbufs[i];
+		}
+	}
+
+	tries++;
+	if (tries == 1) {
+		/* search [0, last_zmbuf_idx) */
+		i = 0;
+		last = vq->last_zmbuf_idx;
+		goto again;
+	}
+
+	return NULL;
+}
+
+static inline bool __attribute__((always_inline))
+mbuf_is_consumed(struct rte_mbuf *m)
+{
+	while (m) {
+		if (rte_mbuf_refcnt_read(m) > 1)
+			return false;
+		m = m->next;
+	}
+
+	return true;
+}
+
+uint16_t
+rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
+	struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count)
+{
+	struct virtio_net *dev;
+	struct rte_mbuf *rarp_mbuf = NULL;
+	struct vhost_virtqueue *vq;
+	uint32_t desc_indexes[MAX_PKT_BURST];
+	uint32_t used_idx;
+	uint32_t i = 0;
+	uint16_t free_entries;
+	uint16_t avail_idx;
+
+	dev = get_device(vid);
+	if (!dev)
+		return 0;
+
+	if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->virt_qp_nb))) {
+		RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n",
+			dev->vid, __func__, queue_id);
+		return 0;
+	}
+
+	vq = dev->virtqueue[queue_id];
+	if (unlikely(vq->enabled == 0))
+		return 0;
+
+	if (unlikely(dev->dequeue_zero_copy)) {
+		struct zcopy_mbuf *zmbuf, *next;
+		int nr_updated = 0;
+
+		for (zmbuf = TAILQ_FIRST(&vq->zmbuf_list);
+		     zmbuf != NULL; zmbuf = next) {
+			next = TAILQ_NEXT(zmbuf, next);
+
+			if (mbuf_is_consumed(zmbuf->mbuf)) {
+				used_idx = vq->last_used_idx++ & (vq->size - 1);
+				update_used_ring(dev, vq, used_idx,
+						 zmbuf->desc_idx);
+				nr_updated += 1;
+
+				TAILQ_REMOVE(&vq->zmbuf_list, zmbuf, next);
+				rte_pktmbuf_free(zmbuf->mbuf);
+				put_zmbuf(zmbuf);
+				vq->nr_zmbuf -= 1;
+			}
+		}
+
+		update_used_idx(dev, vq, nr_updated);
+	}
+
+	/*
+	 * Construct a RARP broadcast packet, and inject it to the "pkts"
+	 * array, to looks like that guest actually send such packet.
+	 *
+	 * Check user_send_rarp() for more information.
+	 */
+	if (unlikely(rte_atomic16_cmpset((volatile uint16_t *)
+					 &dev->broadcast_rarp.cnt, 1, 0))) {
+		rarp_mbuf = rte_pktmbuf_alloc(mbuf_pool);
+		if (rarp_mbuf == NULL) {
+			RTE_LOG(ERR, VHOST_DATA,
+				"Failed to allocate memory for mbuf.\n");
+			return 0;
+		}
+
+		if (make_rarp_packet(rarp_mbuf, &dev->mac)) {
+			rte_pktmbuf_free(rarp_mbuf);
+			rarp_mbuf = NULL;
+		} else {
+			count -= 1;
+		}
+	}
+
+	free_entries = *((volatile uint16_t *)&vq->avail->idx) -
+			vq->last_avail_idx;
+	if (free_entries == 0)
+		goto out;
+
+	LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__);
+
+	/* Prefetch available and used ring */
+	avail_idx = vq->last_avail_idx & (vq->size - 1);
+	used_idx  = vq->last_used_idx  & (vq->size - 1);
+	rte_prefetch0(&vq->avail->ring[avail_idx]);
+	rte_prefetch0(&vq->used->ring[used_idx]);
+
+	count = RTE_MIN(count, MAX_PKT_BURST);
+	count = RTE_MIN(count, free_entries);
+	LOG_DEBUG(VHOST_DATA, "(%d) about to dequeue %u buffers\n",
+			dev->vid, count);
+
+	/* Retrieve all of the head indexes first to avoid caching issues. */
+	for (i = 0; i < count; i++) {
+		avail_idx = (vq->last_avail_idx + i) & (vq->size - 1);
+		used_idx  = (vq->last_used_idx  + i) & (vq->size - 1);
+		desc_indexes[i] = vq->avail->ring[avail_idx];
+
+		if (likely(dev->dequeue_zero_copy == 0))
+			update_used_ring(dev, vq, used_idx, desc_indexes[i]);
+	}
+
+	/* Prefetch descriptor index. */
+	rte_prefetch0(&vq->desc[desc_indexes[0]]);
+	for (i = 0; i < count; i++) {
+		struct vring_desc *desc;
+		uint16_t sz, idx;
+		int err;
+
+		if (likely(i + 1 < count))
+			rte_prefetch0(&vq->desc[desc_indexes[i + 1]]);
+
+		if (vq->desc[desc_indexes[i]].flags & VRING_DESC_F_INDIRECT) {
+			desc = (struct vring_desc *)(uintptr_t)gpa_to_vva(dev,
+					vq->desc[desc_indexes[i]].addr);
+			if (unlikely(!desc))
+				break;
+
+			rte_prefetch0(desc);
+			sz = vq->desc[desc_indexes[i]].len / sizeof(*desc);
+			idx = 0;
+		} else {
+			desc = vq->desc;
+			sz = vq->size;
+			idx = desc_indexes[i];
+		}
+
+		pkts[i] = rte_pktmbuf_alloc(mbuf_pool);
+		if (unlikely(pkts[i] == NULL)) {
+			RTE_LOG(ERR, VHOST_DATA,
+				"Failed to allocate memory for mbuf.\n");
+			break;
+		}
+
+		err = copy_desc_to_mbuf(dev, desc, sz, pkts[i], idx, mbuf_pool);
+		if (unlikely(err)) {
+			rte_pktmbuf_free(pkts[i]);
+			break;
+		}
+
+		if (unlikely(dev->dequeue_zero_copy)) {
+			struct zcopy_mbuf *zmbuf;
+
+			zmbuf = get_zmbuf(vq);
+			if (!zmbuf) {
+				rte_pktmbuf_free(pkts[i]);
+				break;
+			}
+			zmbuf->mbuf = pkts[i];
+			zmbuf->desc_idx = desc_indexes[i];
+
+			/*
+			 * Pin lock the mbuf; we will check later to see
+			 * whether the mbuf is freed (when we are the last
+			 * user) or not. If that's the case, we then could
+			 * update the used ring safely.
+			 */
+			rte_mbuf_refcnt_update(pkts[i], 1);
+
+			vq->nr_zmbuf += 1;
+			TAILQ_INSERT_TAIL(&vq->zmbuf_list, zmbuf, next);
+		}
+	}
+	vq->last_avail_idx += i;
+
+	if (likely(dev->dequeue_zero_copy == 0)) {
+		vq->last_used_idx += i;
+		update_used_idx(dev, vq, i);
+	}
+
+out:
+	if (unlikely(rarp_mbuf != NULL)) {
+		/*
+		 * Inject it to the head of "pkts" array, so that switch's mac
+		 * learning table will get updated first.
+		 */
+		memmove(&pkts[1], pkts, i * sizeof(struct rte_mbuf *));
+		pkts[0] = rarp_mbuf;
+		i += 1;
+	}
+
+	return i;
+}
diff --git a/lib/vhost/task.c b/lib/vhost/task.c
new file mode 100644
index 000000000..c9a27c6b2
--- /dev/null
+++ b/lib/vhost/task.c
@@ -0,0 +1,162 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <assert.h>
+
+#include <rte_config.h>
+#include <rte_mempool.h>
+
+#include "spdk_internal/log.h"
+#include "spdk_internal/event.h"
+#include "spdk/env.h"
+#include "spdk/queue.h"
+#include "task.h"
+
+#undef container_of
+#define container_of(ptr, type, member) ({ \
+		typeof(((type *)0)->member) *__mptr = (ptr); \
+		(type *)((char *)__mptr - offsetof(type, member)); })
+
+typedef TAILQ_HEAD(, spdk_vhost_task) need_iovecs_tailq_t;
+
+static struct rte_mempool *g_task_pool;
+static struct rte_mempool *g_iov_buffer_pool;
+
+need_iovecs_tailq_t g_need_iovecs[RTE_MAX_LCORE];
+
+void
+spdk_vhost_task_put(struct spdk_vhost_task *task)
+{
+	assert(&task->scsi.iov == task->scsi.iovs);
+	assert(task->scsi.iovcnt == 1);
+	spdk_scsi_task_put(&task->scsi);
+}
+
+static void
+spdk_vhost_task_free_cb(struct spdk_scsi_task *scsi_task)
+{
+	struct spdk_vhost_task *task = container_of(scsi_task, struct spdk_vhost_task, scsi);
+
+	rte_mempool_put(g_task_pool, task);
+}
+
+struct spdk_vhost_task *
+spdk_vhost_task_get(uint32_t *owner_task_ctr)
+{
+	struct spdk_vhost_task *task;
+	int rc;
+
+	rc = rte_mempool_get(g_task_pool, (void **)&task);
+	if ((rc < 0) || !task) {
+		SPDK_ERRLOG("Unable to get task\n");
+		rte_panic("no memory\n");
+	}
+
+	memset(task, 0, sizeof(*task));
+	spdk_scsi_task_construct(&task->scsi, owner_task_ctr, NULL);
+	task->scsi.free_fn = spdk_vhost_task_free_cb;
+
+	return task;
+}
+
+void
+spdk_vhost_enqueue_task(struct spdk_vhost_task *task)
+{
+	need_iovecs_tailq_t *tailq = &g_need_iovecs[rte_lcore_id()];
+
+	TAILQ_INSERT_TAIL(tailq, task, iovecs_link);
+}
+
+struct spdk_vhost_task *
+spdk_vhost_dequeue_task(void)
+{
+	need_iovecs_tailq_t *tailq = &g_need_iovecs[rte_lcore_id()];
+	struct spdk_vhost_task *task;
+
+	if (TAILQ_EMPTY(tailq))
+		return NULL;
+
+	task = TAILQ_FIRST(tailq);
+	TAILQ_REMOVE(tailq, task, iovecs_link);
+
+	return task;
+}
+
+struct iovec *
+spdk_vhost_iovec_alloc(void)
+{
+	struct iovec *iov = NULL;
+
+	rte_mempool_get(g_iov_buffer_pool, (void **)&iov);
+	return iov;
+}
+
+void
+spdk_vhost_iovec_free(struct iovec *iov)
+{
+	rte_mempool_put(g_iov_buffer_pool, iov);
+}
+
+static int
+spdk_vhost_subsystem_init(void)
+{
+	g_task_pool = rte_mempool_create("vhost task pool", 16384, sizeof(struct spdk_vhost_task),
+					 128, 0, NULL, NULL, NULL, NULL, SOCKET_ID_ANY, 0);
+	if (!g_task_pool) {
+		SPDK_ERRLOG("create task pool failed\n");
+		return -1;
+	}
+
+	g_iov_buffer_pool = rte_mempool_create("vhost iov buffer pool", 2048,
+					       VHOST_SCSI_IOVS_LEN * sizeof(struct iovec),
+					       128, 0, NULL, NULL, NULL, NULL, SOCKET_ID_ANY, 0);
+	if (!g_iov_buffer_pool) {
+		SPDK_ERRLOG("create iov buffer pool failed\n");
+		return -1;
+	}
+
+	for (int i = 0; i < RTE_MAX_LCORE; i++) {
+		TAILQ_INIT(&g_need_iovecs[i]);
+	}
+
+	return 0;
+}
+
+static int
+spdk_vhost_subsystem_fini(void)
+{
+	return 0;
+}
+
+SPDK_SUBSYSTEM_REGISTER(vhost, spdk_vhost_subsystem_init, spdk_vhost_subsystem_fini, NULL)
+SPDK_SUBSYSTEM_DEPEND(vhost, scsi)
diff --git a/lib/vhost/task.h b/lib/vhost/task.h
new file mode 100644
index 000000000..c60d867de
--- /dev/null
+++ b/lib/vhost/task.h
@@ -0,0 +1,69 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_VHOST_TASK_H
+#define SPDK_VHOST_TASK_H
+
+#include "spdk/scsi.h"
+
+/* Allocated iovec buffer len */
+#define VHOST_SCSI_IOVS_LEN		128
+
+struct spdk_vhost_task {
+	struct spdk_scsi_task	scsi;
+
+	union {
+		struct virtio_scsi_cmd_resp *resp;
+		struct virtio_scsi_ctrl_tmf_resp *tmf_resp;
+	};
+
+	struct spdk_vhost_scsi_ctrlr *vdev;
+	struct spdk_scsi_dev *scsi_dev;
+
+	int req_idx;
+
+	struct vhost_virtqueue *vq;
+
+	TAILQ_ENTRY(spdk_vhost_task) iovecs_link;
+};
+
+void spdk_vhost_enqueue_task(struct spdk_vhost_task *task);
+struct spdk_vhost_task *spdk_vhost_dequeue_task(void);
+
+void spdk_vhost_task_put(struct spdk_vhost_task *task);
+struct spdk_vhost_task *spdk_vhost_task_get(uint32_t *owner_task_ctr);
+
+void spdk_vhost_iovec_free(struct iovec *iov);
+struct iovec *spdk_vhost_iovec_alloc(void);
+
+#endif /* SPDK_VHOST_TASK_H */
diff --git a/lib/vhost/vhost.c b/lib/vhost/vhost.c
new file mode 100644
index 000000000..a8d7ea7a9
--- /dev/null
+++ b/lib/vhost/vhost.c
@@ -0,0 +1,1161 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/virtio_ring.h>
+#include <linux/virtio_scsi.h>
+#include <stdint.h>
+#include <sys/eventfd.h>
+#include <sys/param.h>
+#include <unistd.h>
+#include <semaphore.h>
+
+#include <rte_config.h>
+#include <rte_malloc.h>
+#include <rte_virtio_net.h>
+#include <vhost.h>
+#include <vhost_user.h>
+
+#include "spdk_internal/log.h"
+#include "spdk/env.h"
+#include "spdk/scsi.h"
+#include "spdk/conf.h"
+#include "spdk/event.h"
+#include "spdk/scsi_spec.h"
+
+#include "spdk/vhost.h"
+#include "task.h"
+
+static uint32_t g_num_ctrlrs[RTE_MAX_LCORE];
+
+#define CONTROLQ_POLL_PERIOD_US (1000 * 5)
+
+#define VIRTIO_SCSI_CONTROLQ   0
+#define VIRTIO_SCSI_EVENTQ   1
+#define VIRTIO_SCSI_REQUESTQ   2
+
+/* Path to folder where character device will be created. Can be set by user. */
+static char dev_dirname[PATH_MAX] = "";
+
+struct spdk_vaddr_region {
+	void		*vaddr;
+	uint64_t	len;
+};
+
+/*
+ * Device linked list structure for data path.
+ */
+struct spdk_vhost_scsi_ctrlr {
+	char *name;
+	/**< Pointer to device created by vhost lib. */
+	struct virtio_net      *dev;
+
+	struct spdk_vaddr_region	region[VHOST_MEMORY_MAX_NREGIONS];
+	uint32_t			nregions;
+
+	/**< TODO make this an array of spdk_scsi_devs.  The vhost scsi
+	 *   request will tell us which scsi_dev to use.
+	 */
+	struct spdk_scsi_dev *scsi_dev[SPDK_VHOST_SCSI_CTRLR_MAX_DEVS];
+
+	int task_cnt;
+
+	struct spdk_poller *requestq_poller;
+	struct spdk_poller *controlq_poller;
+
+	int32_t lcore;
+
+	uint64_t cpumask;
+} __rte_cache_aligned;
+
+/* This maps from the integer index passed by DPDK to the our controller representation. */
+struct spdk_vhost_scsi_ctrlr *dpdk_vid_mapping[MAX_VHOST_DEVICE]; /* MAX_VHOST_DEVICE from DPDK. */
+
+/*
+ * Get available requests from avail ring.
+ */
+static uint16_t
+vq_avail_ring_get(struct vhost_virtqueue *vq, uint16_t *reqs, uint16_t reqs_len)
+{
+	struct vring_avail *avail = vq->avail;
+	uint16_t size_mask = vq->size - 1;
+	uint16_t last_idx = vq->last_avail_idx, avail_idx = avail->idx;
+	uint16_t count = RTE_MIN((avail_idx - last_idx) & size_mask, reqs_len);
+	uint16_t i;
+
+	vq->last_avail_idx += count;
+	for (i = 0; i < count; i++) {
+		reqs[i] = vq->avail->ring[(last_idx + i) & size_mask];
+	}
+
+	SPDK_TRACELOG(SPDK_TRACE_VHOST_RING,
+		      "AVAIL: last_idx=%"PRIu16" avail_idx=%"PRIu16" count=%"PRIu16"\n",
+		      last_idx, avail_idx, count);
+
+	return count;
+}
+
+/*
+ * Enqueue id and len to used ring.
+ */
+static void
+vq_used_ring_enqueue(struct vhost_virtqueue *vq, uint16_t id, uint32_t len)
+{
+	struct vring_used *used = vq->used;
+	uint16_t size_mask = vq->size - 1;
+	uint16_t last_idx = vq->last_used_idx;
+
+	SPDK_TRACELOG(SPDK_TRACE_VHOST_RING, "USED: last_idx=%"PRIu16" req id=%"PRIu16" len=%"PRIu32"\n",
+		      last_idx, id, len);
+
+	vq->last_used_idx++;
+	last_idx &= size_mask;
+
+	used->ring[last_idx].id = id;
+	used->ring[last_idx].len = len;
+
+	rte_compiler_barrier();
+
+	vq->used->idx = vq->last_used_idx;
+	eventfd_write(vq->callfd, (eventfd_t)1);
+}
+
+static bool
+vring_desc_has_next(struct vring_desc *cur_desc)
+{
+	return !!(cur_desc->flags & VRING_DESC_F_NEXT);
+}
+
+static struct vring_desc *
+vring_desc_get_next(struct vring_desc *vq_desc, struct vring_desc *cur_desc)
+{
+	assert(vring_desc_has_next(cur_desc));
+	return &vq_desc[cur_desc->next];
+}
+
+static bool
+vring_desc_is_wr(struct vring_desc *cur_desc)
+{
+	return !!(cur_desc->flags & VRING_DESC_F_WRITE);
+}
+
+static void task_submit(struct spdk_vhost_task *task);
+static int process_request(struct spdk_vhost_task *task);
+static void invalid_request(struct spdk_vhost_task *task);
+
+static void
+submit_completion(struct spdk_vhost_task *task)
+{
+	struct iovec *iovs = NULL;
+	int result;
+
+	vq_used_ring_enqueue(task->vq, task->req_idx, task->scsi.data_transferred);
+	SPDK_TRACELOG(SPDK_TRACE_VHOST, "Finished task (%p) req_idx=%d\n", task, task->req_idx);
+
+	if (task->scsi.iovs != &task->scsi.iov) {
+		iovs = task->scsi.iovs;
+		task->scsi.iovs = &task->scsi.iov;
+		task->scsi.iovcnt = 1;
+	}
+
+	spdk_vhost_task_put(task);
+
+	if (!iovs) {
+		return;
+	}
+
+	while (1) {
+		task = spdk_vhost_dequeue_task();
+		if (!task) {
+			spdk_vhost_iovec_free(iovs);
+			break;
+		}
+
+		/* Set iovs so underlying functions will not try to alloc IOV */
+		task->scsi.iovs = iovs;
+		task->scsi.iovcnt = VHOST_SCSI_IOVS_LEN;
+
+		result = process_request(task);
+		if (result == 0) {
+			task_submit(task);
+			break;
+		} else {
+			task->scsi.iovs = &task->scsi.iov;
+			task->scsi.iovcnt = 1;
+			invalid_request(task);
+		}
+	}
+}
+
+static void
+process_mgmt_task_completion(void *arg1, void *arg2)
+{
+	struct spdk_vhost_task *task = arg1;
+
+	submit_completion(task);
+}
+
+static void
+process_task_completion(void *arg1, void *arg2)
+{
+	struct spdk_vhost_task *task = arg1;
+
+	/* The SCSI task has completed.  Do final processing and then post
+	   notification to the virtqueue's "used" ring.
+	 */
+	task->resp->status = task->scsi.status;
+
+	if (task->scsi.status != SPDK_SCSI_STATUS_GOOD) {
+		memcpy(task->resp->sense, task->scsi.sense_data, task->scsi.sense_data_len);
+		task->resp->sense_len = task->scsi.sense_data_len;
+	}
+	task->resp->resid = task->scsi.transfer_len - task->scsi.data_transferred;
+
+	submit_completion(task);
+}
+
+static void
+task_submit(struct spdk_vhost_task *task)
+{
+	/* The task is ready to be submitted.  First create the callback event that
+	   will be invoked when the SCSI command is completed.  See process_task_completion()
+	   for what SPDK vhost-scsi does when the task is completed.
+	 */
+
+	task->resp->response = VIRTIO_SCSI_S_OK;
+	task->scsi.cb_event = spdk_event_allocate(rte_lcore_id(),
+			      process_task_completion,
+			      task, NULL);
+	spdk_scsi_dev_queue_task(task->scsi_dev, &task->scsi);
+}
+
+static void
+mgmt_task_submit(struct spdk_vhost_task *task)
+{
+	task->tmf_resp->response = VIRTIO_SCSI_S_OK;
+	task->scsi.cb_event = spdk_event_allocate(rte_lcore_id(),
+			      process_mgmt_task_completion,
+			      task, NULL);
+	spdk_scsi_dev_queue_mgmt_task(task->scsi_dev, &task->scsi);
+}
+
+static void
+invalid_request(struct spdk_vhost_task *task)
+{
+	vq_used_ring_enqueue(task->vq, task->req_idx, 0);
+	spdk_vhost_task_put(task);
+
+	SPDK_TRACELOG(SPDK_TRACE_VHOST, "Invalid request (status=%" PRIu8")\n",
+		      task->resp ? task->resp->response : -1);
+}
+
+static struct spdk_scsi_dev *
+get_scsi_dev(struct spdk_vhost_scsi_ctrlr *vdev, const __u8 *lun)
+{
+	SPDK_TRACEDUMP(SPDK_TRACE_VHOST_QUEUE, "LUN", lun, 8);
+	/* First byte must be 1 and second is target */
+	if (lun[0] != 1 || lun[1] >= SPDK_VHOST_SCSI_CTRLR_MAX_DEVS)
+		return NULL;
+
+	return vdev->scsi_dev[lun[1]];
+}
+
+static struct spdk_scsi_lun *
+get_scsi_lun(struct spdk_scsi_dev *scsi_dev, const __u8 *lun)
+{
+	uint16_t lun_id = (((uint16_t)lun[2] << 8) | lun[3]) & 0x3FFF;
+
+	/* For now only one LUN per controller is allowed so no need to search LUN IDs*/
+	return likely(scsi_dev != NULL && lun_id < scsi_dev->maxlun) ? scsi_dev->lun[lun_id] : NULL;
+}
+
+static void
+process_ctrl_request(struct spdk_vhost_scsi_ctrlr *vdev, struct vhost_virtqueue *controlq,
+		     uint16_t req_idx)
+{
+	struct spdk_vhost_task *task;
+
+	struct vring_desc *desc;
+	struct virtio_scsi_ctrl_tmf_req *ctrl_req;
+	struct virtio_scsi_ctrl_an_resp *an_resp;
+
+	desc = &controlq->desc[req_idx];
+	ctrl_req = (void *)gpa_to_vva(vdev->dev, desc->addr);
+
+	SPDK_TRACELOG(SPDK_TRACE_VHOST_QUEUE,
+		      "Processing controlq descriptor: desc %d/%p, desc_addr %p, len %d, flags %d, last_used_idx %d; enabled %d; kickfd %d; size %d\n",
+		      req_idx, desc, (void *)desc->addr, desc->len, desc->flags, controlq->last_used_idx,
+		      controlq->enabled, controlq->kickfd, controlq->size);
+	SPDK_TRACEDUMP(SPDK_TRACE_VHOST_QUEUE, "Request desriptor", (uint8_t *)ctrl_req,
+		       desc->len);
+
+	task = spdk_vhost_task_get(&vdev->task_cnt);
+	task->vq = controlq;
+	task->vdev = vdev;
+	task->req_idx = req_idx;
+	task->scsi_dev = get_scsi_dev(task->vdev, ctrl_req->lun);
+
+	/* Process the TMF request */
+	switch (ctrl_req->type) {
+	case VIRTIO_SCSI_T_TMF:
+		/* Get the response buffer */
+		assert(vring_desc_has_next(desc));
+		desc = vring_desc_get_next(controlq->desc, desc);
+		task->tmf_resp = (void *)gpa_to_vva(vdev->dev, desc->addr);
+
+		/* Check if we are processing a valid request */
+		if (task->scsi_dev == NULL) {
+			task->tmf_resp->response = VIRTIO_SCSI_S_BAD_TARGET;
+			break;
+		}
+
+		switch (ctrl_req->subtype) {
+		case VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET:
+			/* Handle LUN reset */
+			SPDK_TRACELOG(SPDK_TRACE_VHOST_QUEUE, "LUN reset\n");
+			task->scsi.type = SPDK_SCSI_TASK_TYPE_MANAGE;
+			task->scsi.function = SPDK_SCSI_TASK_FUNC_LUN_RESET;
+			task->scsi.lun = get_scsi_lun(task->scsi_dev, ctrl_req->lun);
+
+			mgmt_task_submit(task);
+			return;
+		default:
+			task->tmf_resp->response = VIRTIO_SCSI_S_ABORTED;
+			/* Unsupported command */
+			SPDK_TRACELOG(SPDK_TRACE_VHOST_QUEUE, "Unsupported TMF command %x\n", ctrl_req->subtype);
+			break;
+		}
+		break;
+	case VIRTIO_SCSI_T_AN_QUERY:
+	case VIRTIO_SCSI_T_AN_SUBSCRIBE: {
+		desc = vring_desc_get_next(controlq->desc, desc);
+		an_resp = (void *)gpa_to_vva(vdev->dev, desc->addr);
+		an_resp->response = VIRTIO_SCSI_S_ABORTED;
+		break;
+	}
+	default:
+		SPDK_TRACELOG(SPDK_TRACE_VHOST_QUEUE, "Unsupported control command %x\n", ctrl_req->type);
+		break;
+	}
+
+	vq_used_ring_enqueue(controlq, req_idx, 0);
+	spdk_vhost_task_put(task);
+}
+
+/*
+ * Process task's descriptor chain and setup data related fields.
+ * Return
+ *   -1 if request is invalid and must be aborted,
+ *    0 if all data are set,
+ *    1 if it was not possible to allocate IO vector for this task.
+ */
+static int
+task_data_setup(struct spdk_vhost_task *task,
+		struct virtio_scsi_cmd_req **req)
+{
+	struct vhost_virtqueue *vq = task->vq;
+	struct virtio_net *dev = task->vdev->dev;
+	struct vring_desc *desc =  &task->vq->desc[task->req_idx];
+	struct iovec *iovs = task->scsi.iovs;
+	uint16_t iovcnt = 0, iovcnt_max = task->scsi.iovcnt;
+	uint32_t len = 0;
+
+	assert(iovcnt_max == 1 || iovcnt_max == VHOST_SCSI_IOVS_LEN);
+
+	/* Sanity check. First descriptor must be readable and must have next one. */
+	if (unlikely(vring_desc_is_wr(desc) || !vring_desc_has_next(desc))) {
+		SPDK_WARNLOG("Invalid first (request) descriptor.\n");
+		task->resp = NULL;
+		goto abort_task;
+	}
+
+	*req = (void *)gpa_to_vva(dev, desc->addr);
+
+	desc = vring_desc_get_next(vq->desc, desc);
+	task->scsi.dxfer_dir = vring_desc_is_wr(desc) ? SPDK_SCSI_DIR_FROM_DEV : SPDK_SCSI_DIR_TO_DEV;
+
+	if (task->scsi.dxfer_dir == SPDK_SCSI_DIR_FROM_DEV) {
+		/*
+		 * FROM_DEV (READ): [RD_req][WR_resp][WR_buf0]...[WR_bufN]
+		 */
+		task->resp = (void *)gpa_to_vva(dev, desc->addr);
+		if (!vring_desc_has_next(desc)) {
+			/*
+			 * TEST UNIT READY command and some others might not contain any payload and this is not an error.
+			 */
+			SPDK_TRACELOG(SPDK_TRACE_VHOST_DATA,
+				      "No payload descriptors for FROM DEV command req_idx=%"PRIu16".\n", task->req_idx);
+			SPDK_TRACEDUMP(SPDK_TRACE_VHOST_DATA, "CDB=", (*req)->cdb, VIRTIO_SCSI_CDB_SIZE);
+			task->scsi.iovcnt = 1;
+			task->scsi.iovs[0].iov_len = 0;
+			task->scsi.length = 0;
+			task->scsi.transfer_len = 0;
+			return 0;
+		}
+
+		desc = vring_desc_get_next(vq->desc, desc);
+		if (iovcnt_max != VHOST_SCSI_IOVS_LEN && vring_desc_has_next(desc)) {
+			iovs = spdk_vhost_iovec_alloc();
+			if (iovs == NULL) {
+				return 1;
+			}
+
+			iovcnt_max = VHOST_SCSI_IOVS_LEN;
+		}
+
+		/* All remaining descriptors are data. */
+		while (iovcnt < iovcnt_max) {
+			iovs[iovcnt].iov_base = (void *)gpa_to_vva(dev, desc->addr);
+			iovs[iovcnt].iov_len = desc->len;
+			len += desc->len;
+			iovcnt++;
+
+			if (!vring_desc_has_next(desc))
+				break;
+
+			desc = vring_desc_get_next(vq->desc, desc);
+			if (unlikely(!vring_desc_is_wr(desc))) {
+				SPDK_WARNLOG("FROM DEV cmd: descriptor nr %" PRIu16" in payload chain is read only.\n", iovcnt);
+				task->resp = NULL;
+				goto abort_task;
+			}
+		}
+	} else {
+		SPDK_TRACELOG(SPDK_TRACE_VHOST_DATA, "TO DEV");
+		/*
+		 * TO_DEV (WRITE):[RD_req][RD_buf0]...[RD_bufN][WR_resp]
+		 * No need to check descriptor WR flag as this is done while setting scsi.dxfer_dir.
+		 */
+
+		if (iovcnt_max != VHOST_SCSI_IOVS_LEN && vring_desc_has_next(desc)) {
+			/* If next descriptor is not for response, allocate iovs. */
+			if (!vring_desc_is_wr(vring_desc_get_next(vq->desc, desc))) {
+				iovs = spdk_vhost_iovec_alloc();
+
+				if (iovs == NULL) {
+					return 1;
+				}
+
+				iovcnt_max = VHOST_SCSI_IOVS_LEN;
+			}
+		}
+
+		/* Process descriptors up to response. */
+		while (!vring_desc_is_wr(desc) && iovcnt < iovcnt_max) {
+			iovs[iovcnt].iov_base = (void *)gpa_to_vva(dev, desc->addr);
+			iovs[iovcnt].iov_len = desc->len;
+			len += desc->len;
+			iovcnt++;
+
+			if (!vring_desc_has_next(desc)) {
+				SPDK_WARNLOG("TO_DEV cmd: no response descriptor.\n");
+				task->resp = NULL;
+				goto abort_task;
+			}
+
+			desc = vring_desc_get_next(vq->desc, desc);
+		}
+
+		task->resp = (void *)gpa_to_vva(dev, desc->addr);
+		if (vring_desc_has_next(desc)) {
+			SPDK_WARNLOG("TO_DEV cmd: ignoring unexpected descriptors after response descriptor.\n");
+		}
+	}
+
+	if (iovcnt_max > 1 && iovcnt == iovcnt_max) {
+		SPDK_WARNLOG("Too many IO vectors in chain!\n");
+		goto abort_task;
+	}
+
+	task->scsi.iovs = iovs;
+	task->scsi.iovcnt = iovcnt;
+	task->scsi.length = len;
+	task->scsi.transfer_len = len;
+	return 0;
+
+abort_task:
+	if (iovs != task->scsi.iovs) {
+		spdk_vhost_iovec_free(iovs);
+	}
+
+	if (task->resp) {
+		task->resp->response = VIRTIO_SCSI_S_ABORTED;
+	}
+
+	return -1;
+}
+
+static int
+process_request(struct spdk_vhost_task *task)
+{
+	struct virtio_scsi_cmd_req *req;
+	int result;
+
+	result = task_data_setup(task, &req);
+	if (result) {
+		return result;
+	}
+
+	task->scsi_dev = get_scsi_dev(task->vdev, req->lun);
+	if (unlikely(task->scsi_dev == NULL)) {
+		task->resp->response = VIRTIO_SCSI_S_BAD_TARGET;
+		return -1;
+	}
+
+	task->scsi.lun = get_scsi_lun(task->scsi_dev, req->lun);
+	task->scsi.cdb = req->cdb;
+	task->scsi.target_port = spdk_scsi_dev_find_port_by_id(task->scsi_dev, 0);
+	SPDK_TRACEDUMP(SPDK_TRACE_VHOST_DATA, "request CDB", req->cdb, VIRTIO_SCSI_CDB_SIZE);
+	return 0;
+}
+
+static void
+process_controlq(struct spdk_vhost_scsi_ctrlr *vdev, struct vhost_virtqueue *vq)
+{
+	uint16_t reqs[32];
+	uint16_t reqs_cnt, i;
+
+	reqs_cnt = vq_avail_ring_get(vq, reqs, RTE_DIM(reqs));
+	for (i = 0; i < reqs_cnt; i++) {
+		process_ctrl_request(vdev, vq, reqs[i]);
+	}
+}
+
+static void
+process_requestq(struct spdk_vhost_scsi_ctrlr *vdev, struct vhost_virtqueue *vq)
+{
+	uint16_t reqs[32];
+	uint16_t reqs_cnt, i;
+	struct spdk_vhost_task *task;
+	int result;
+
+	reqs_cnt = vq_avail_ring_get(vq, reqs, RTE_DIM(reqs));
+	for (i = 0; i < reqs_cnt; i++) {
+		task = spdk_vhost_task_get(&vdev->task_cnt);
+
+		SPDK_TRACELOG(SPDK_TRACE_VHOST, "====== Starting processing request idx %"PRIu16"======\n",
+			      reqs[i]);
+		task->vq = vq;
+		task->vdev = vdev;
+		task->req_idx = reqs[i];
+		result = process_request(task);
+		if (likely(result == 0)) {
+			task_submit(task);
+			SPDK_TRACELOG(SPDK_TRACE_VHOST, "====== Task %p req_idx %d submitted ======\n", task,
+				      task->req_idx);
+		} else if (result > 0) {
+			spdk_vhost_enqueue_task(task);
+			SPDK_TRACELOG(SPDK_TRACE_VHOST, "====== Task %p req_idx %d deferred ======\n", task, task->req_idx);
+		} else {
+			invalid_request(task);
+			SPDK_TRACELOG(SPDK_TRACE_VHOST, "====== Task %p req_idx %d failed ======\n", task, task->req_idx);
+		}
+	}
+}
+
+static void
+vdev_controlq_worker(void *arg)
+{
+	struct spdk_vhost_scsi_ctrlr *vdev = arg;
+
+	process_controlq(vdev, vdev->dev->virtqueue[VIRTIO_SCSI_CONTROLQ]);
+}
+
+static void
+vdev_worker(void *arg)
+{
+	struct spdk_vhost_scsi_ctrlr *vdev = arg;
+	uint32_t q_idx;
+
+	for (q_idx = VIRTIO_SCSI_REQUESTQ; q_idx < vdev->dev->num_queues; q_idx++) {
+		process_requestq(vdev, vdev->dev->virtqueue[q_idx]);
+	}
+}
+
+#define SHIFT_2MB	21
+#define SIZE_2MB	(1ULL << SHIFT_2MB)
+#define FLOOR_2MB(x)	(((uintptr_t)x) / SIZE_2MB) << SHIFT_2MB
+#define CEIL_2MB(x)	((((uintptr_t)x) + SIZE_2MB - 1) / SIZE_2MB) << SHIFT_2MB
+
+static void
+vdev_event_done_cb(void *arg1, void *arg2)
+{
+	sem_post((sem_t *)arg2);
+}
+
+static struct spdk_event *
+vhost_sem_event_alloc(uint32_t core, spdk_event_fn fn, void *arg1, sem_t *sem)
+{
+	if (sem_init(sem, 0, 0) < 0)
+		rte_panic("Failed to initialize semaphore.");
+
+	return spdk_event_allocate(core, fn, arg1, sem);
+}
+
+static int
+vhost_sem_timedwait(sem_t *sem, unsigned sec)
+{
+	struct timespec timeout;
+	int rc;
+
+	clock_gettime(CLOCK_REALTIME, &timeout);
+	timeout.tv_sec += sec;
+
+	rc = sem_timedwait(sem, &timeout);
+	sem_destroy(sem);
+
+	return rc;
+}
+
+static void
+add_vdev_cb(void *arg1, void *arg2)
+{
+	struct spdk_vhost_scsi_ctrlr *vdev = arg1;
+	struct virtio_memory_region *region;
+	uint32_t i;
+
+	for (i = 0; i < SPDK_VHOST_SCSI_CTRLR_MAX_DEVS; i++) {
+		if (vdev->scsi_dev[i] == NULL) {
+			continue;
+		}
+		spdk_scsi_dev_allocate_io_channels(vdev->scsi_dev[i]);
+	}
+	SPDK_NOTICELOG("Started poller for vhost controller %s on lcore %d\n", vdev->name, vdev->lcore);
+	vdev->nregions = vdev->dev->mem->nregions;
+	for (i = 0; i < vdev->nregions; i++) {
+		uint64_t start, end, len;
+		region = &vdev->dev->mem->regions[i];
+		start = FLOOR_2MB(region->mmap_addr);
+		end = CEIL_2MB(region->mmap_addr + region->mmap_size);
+		len = end - start;
+		vdev->region[i].vaddr = (void *)start;
+		vdev->region[i].len = len;
+		SPDK_NOTICELOG("Registering VM memory for vtophys translation - 0x%jx len:0x%jx\n",
+			       start, len);
+		spdk_vtophys_register(vdev->region[i].vaddr, vdev->region[i].len);
+	}
+
+	spdk_poller_register(&vdev->requestq_poller, vdev_worker, vdev, vdev->lcore, 0);
+	spdk_poller_register(&vdev->controlq_poller, vdev_controlq_worker, vdev, vdev->lcore,
+			     CONTROLQ_POLL_PERIOD_US);
+	sem_post((sem_t *)arg2);
+}
+
+static void
+remove_vdev_cb(void *arg1, void *arg2)
+{
+	struct spdk_vhost_scsi_ctrlr *vdev = arg1;
+	uint32_t i;
+
+	for (i = 0; i < SPDK_VHOST_SCSI_CTRLR_MAX_DEVS; i++) {
+		if (vdev->scsi_dev[i] == NULL) {
+			continue;
+		}
+		spdk_scsi_dev_free_io_channels(vdev->scsi_dev[i]);
+	}
+
+	SPDK_NOTICELOG("Stopping poller for vhost controller %s\n", vdev->name);
+	for (i = 0; i < vdev->nregions; i++) {
+		spdk_vtophys_unregister(vdev->region[i].vaddr, vdev->region[i].len);
+	}
+
+	vdev->nregions = 0;
+
+	sem_post((sem_t *)arg2);
+}
+
+static void
+destroy_device(int vid)
+{
+	struct spdk_vhost_scsi_ctrlr *vdev = dpdk_vid_mapping[vid];
+	struct spdk_event *event;
+	sem_t done_sem;
+	uint32_t i;
+
+	event = vhost_sem_event_alloc(vdev->lcore, vdev_event_done_cb, NULL, &done_sem);
+	spdk_poller_unregister(&vdev->requestq_poller, event);
+	if (vhost_sem_timedwait(&done_sem, 1))
+		rte_panic("%s: failed to unregister request queue poller.\n", vdev->name);
+
+	event = vhost_sem_event_alloc(vdev->lcore, vdev_event_done_cb, NULL, &done_sem);
+	spdk_poller_unregister(&vdev->controlq_poller, event);
+	if (vhost_sem_timedwait(&done_sem, 1))
+		rte_panic("%s: failed to unregister control queue poller.\n", vdev->name);
+
+	/* Wait for all tasks to finish */
+	for (i = 1000; i && vdev->task_cnt > 0; i--) {
+		usleep(1000);
+	}
+
+	if (vdev->task_cnt > 0) {
+		rte_panic("%s: pending tasks did not finish in 1s.\n", vdev->name);
+	}
+
+	event = vhost_sem_event_alloc(vdev->lcore, remove_vdev_cb, vdev, &done_sem);
+	spdk_event_call(event);
+	if (vhost_sem_timedwait(&done_sem, 1))
+		rte_panic("%s: failed to unregister poller.\n", vdev->name);
+
+	g_num_ctrlrs[vdev->lcore]--;
+	vdev->lcore = -1;
+	vdev->dev = NULL;
+	dpdk_vid_mapping[vid] = NULL;
+}
+
+#define LUN_DEV_NAME_SIZE 8
+#define MAX_SCSI_CTRLRS 15
+
+static struct spdk_vhost_scsi_ctrlr *spdk_vhost_ctrlrs[MAX_SCSI_CTRLRS];
+
+static struct spdk_vhost_scsi_ctrlr *
+spdk_vhost_scsi_ctrlr_find(const char *ctrlr_name)
+{
+	unsigned i;
+	size_t dev_dirname_len = strlen(dev_dirname);
+
+	if (strncmp(ctrlr_name, dev_dirname, dev_dirname_len) == 0) {
+		ctrlr_name += dev_dirname_len;
+	}
+
+	for (i = 0; i < MAX_SCSI_CTRLRS; i++) {
+		if (spdk_vhost_ctrlrs[i] == NULL) {
+			continue;
+		}
+
+		if (strcmp(spdk_vhost_ctrlrs[i]->name, ctrlr_name) == 0) {
+			return spdk_vhost_ctrlrs[i];
+		}
+	}
+
+	return NULL;
+}
+
+int
+spdk_vhost_scsi_ctrlr_construct(const char *name, uint64_t cpumask)
+{
+	struct spdk_vhost_scsi_ctrlr *vdev;
+	unsigned ctrlr_num;
+	char path[PATH_MAX];
+
+	if (name == NULL) {
+		SPDK_ERRLOG("Can't add controller with no name\n");
+		return -EINVAL;
+	}
+
+	if ((cpumask & spdk_app_get_core_mask()) != cpumask) {
+		SPDK_ERRLOG("cpumask 0x%jx not a subset of app mask 0x%jx\n",
+			    cpumask, spdk_app_get_core_mask());
+		return -EINVAL;
+	}
+
+	if (spdk_vhost_scsi_ctrlr_find(name)) {
+		SPDK_ERRLOG("vhost scsi controller %s already exists.\n", name);
+		return -EEXIST;
+	}
+
+	for (ctrlr_num = 0; ctrlr_num < MAX_SCSI_CTRLRS; ctrlr_num++) {
+		if (spdk_vhost_ctrlrs[ctrlr_num] == NULL) {
+			break;
+		}
+	}
+
+	if (ctrlr_num == MAX_SCSI_CTRLRS) {
+		SPDK_ERRLOG("Max scsi controllers reached (%d).\n", MAX_SCSI_CTRLRS);
+		return -ENOSPC;
+	}
+
+	vdev = rte_zmalloc(NULL, sizeof(*vdev), RTE_CACHE_LINE_SIZE);
+	if (vdev == NULL) {
+		SPDK_ERRLOG("Couldn't allocate memory for vhost dev\n");
+		return -ENOMEM;
+	}
+
+	snprintf(path, sizeof(path), "%s%s", dev_dirname, name);
+	/* Register vhost(cuse or user) driver to handle vhost messages. */
+	if (access(path, F_OK) != -1) {
+		if (unlink(path) != 0)
+			rte_exit(EXIT_FAILURE, "Cannot remove %s.\n", path);
+	}
+
+	if (rte_vhost_driver_register(path, 0) != 0) {
+		SPDK_ERRLOG("Could not register controller %s with vhost library\n", name);
+		SPDK_ERRLOG("Check if domain socket %s already exists\n", path);
+		return -EIO;
+	}
+
+	spdk_vhost_ctrlrs[ctrlr_num] = vdev;
+	vdev->name =  strdup(name);
+	vdev->cpumask = cpumask;
+	vdev->lcore = -1;
+	SPDK_NOTICELOG("Controller %s: new controller added\n", name);
+	return 0;
+}
+
+int
+spdk_vhost_parse_core_mask(const char *mask, uint64_t *cpumask)
+{
+	char *end;
+
+	if (mask == NULL || cpumask == NULL) {
+		return -1;
+	}
+
+	errno = 0;
+	*cpumask = strtoull(mask, &end, 16);
+
+	if (*end != '\0' || errno || !*cpumask ||
+	    ((*cpumask & spdk_app_get_core_mask()) != *cpumask)) {
+
+		SPDK_ERRLOG("cpumask %s not a subset of app mask 0x%jx\n",
+			    mask, spdk_app_get_core_mask());
+		return -1;
+	}
+
+	return 0;
+}
+
+struct spdk_scsi_dev *
+spdk_vhost_scsi_ctrlr_get_dev(struct spdk_vhost_scsi_ctrlr *ctrlr, uint8_t num)
+{
+	assert(ctrlr != NULL);
+	assert(num < SPDK_VHOST_SCSI_CTRLR_MAX_DEVS);
+	return ctrlr->scsi_dev[num];
+}
+
+int
+spdk_vhost_scsi_ctrlr_add_dev(const char *ctrlr_name, unsigned scsi_dev_num, const char *lun_name)
+{
+	struct spdk_vhost_scsi_ctrlr *vdev;
+	char dev_name[SPDK_SCSI_DEV_MAX_NAME];
+	int lun_id_list[1];
+	char *lun_names_list[1];
+
+	if (ctrlr_name == NULL) {
+		SPDK_ERRLOG("No controller name\n");
+		return -EINVAL;
+	}
+
+	if (scsi_dev_num > SPDK_VHOST_SCSI_CTRLR_MAX_DEVS) {
+		SPDK_ERRLOG("Controller %d device num too big (max %d)\n", scsi_dev_num,
+			    SPDK_VHOST_SCSI_CTRLR_MAX_DEVS);
+		return -EINVAL;
+	}
+
+	if (lun_name == NULL) {
+		SPDK_ERRLOG("No lun name specified \n");
+		return -EINVAL;
+	}
+
+	vdev = spdk_vhost_scsi_ctrlr_find(ctrlr_name);
+	if (vdev == NULL) {
+		SPDK_ERRLOG("Controller %s is not defined\n", ctrlr_name);
+		return -ENODEV;
+	}
+
+	if (vdev->lcore != -1) {
+		SPDK_ERRLOG("Controller %s is in use and hotplug is not supported\n", ctrlr_name);
+		return -ENODEV;
+	}
+
+	if (vdev->scsi_dev[scsi_dev_num] != NULL) {
+		SPDK_ERRLOG("Controller %s dev %u already occupied\n", ctrlr_name, scsi_dev_num);
+		return -EEXIST;
+	}
+
+	/*
+	 * At this stage only one LUN per device
+	 */
+	snprintf(dev_name, sizeof(dev_name), "Dev%u", scsi_dev_num);
+	lun_id_list[0] = 0;
+	lun_names_list[0] = (char *)lun_name;
+
+	vdev->scsi_dev[scsi_dev_num] = spdk_scsi_dev_construct(dev_name, lun_names_list, lun_id_list, 1);
+	if (vdev->scsi_dev[scsi_dev_num] == NULL) {
+		SPDK_ERRLOG("Couldn't create spdk SCSI device '%s' using lun device '%s' in controller: %s\n",
+			    dev_name, lun_name, vdev->name);
+		return -EINVAL;
+	}
+
+	spdk_scsi_dev_add_port(vdev->scsi_dev[scsi_dev_num], 0, "vhost");
+	SPDK_NOTICELOG("Controller %s: defined device '%s' using lun '%s'\n",
+		       vdev->name, dev_name, lun_name);
+	return 0;
+}
+
+struct spdk_vhost_scsi_ctrlr *
+spdk_vhost_scsi_ctrlr_next(struct spdk_vhost_scsi_ctrlr *prev)
+{
+	int i = 0;
+
+	if (prev != NULL) {
+		for (; i < MAX_SCSI_CTRLRS; i++) {
+			if (spdk_vhost_ctrlrs[i] == prev) {
+				break;
+			}
+		}
+
+		i++;
+	}
+
+	for (; i < MAX_SCSI_CTRLRS; i++) {
+		if (spdk_vhost_ctrlrs[i] == NULL) {
+			continue;
+		}
+
+		return spdk_vhost_ctrlrs[i];
+	}
+
+	return NULL;
+}
+
+const char *
+spdk_vhost_scsi_ctrlr_get_name(struct spdk_vhost_scsi_ctrlr *ctrlr)
+{
+	assert(ctrlr != NULL);
+	return ctrlr->name;
+}
+
+uint64_t
+spdk_vhost_scsi_ctrlr_get_cpumask(struct spdk_vhost_scsi_ctrlr *ctrlr)
+{
+	assert(ctrlr != NULL);
+	return ctrlr->cpumask;
+}
+
+static int spdk_vhost_scsi_controller_construct(void)
+{
+	struct spdk_conf_section *sp = spdk_conf_first_section(NULL);
+	int i;
+	unsigned ctrlr_num = 0;
+	char *lun_name, dev_name[LUN_DEV_NAME_SIZE];
+	char *cpumask_str;
+	char *name;
+	uint64_t cpumask;
+
+	while (sp != NULL) {
+		if (!spdk_conf_section_match_prefix(sp, "VhostScsi")) {
+			sp = spdk_conf_next_section(sp);
+			continue;
+		}
+
+		if (sscanf(spdk_conf_section_get_name(sp), "VhostScsi%u", &ctrlr_num) != 1) {
+			SPDK_WARNLOG("Ignoring section that don't match VhostScsi controller template: %s\n",
+				     spdk_conf_section_get_name(sp));
+			continue;
+		}
+
+		name =  spdk_conf_section_get_val(sp, "Name");
+		cpumask_str = spdk_conf_section_get_val(sp, "Cpumask");
+		if (cpumask_str == NULL) {
+			cpumask = spdk_app_get_core_mask();
+		} else if (spdk_vhost_parse_core_mask(cpumask_str, &cpumask)) {
+			SPDK_ERRLOG("Error parsing cpumask while creating controller\n");
+			return -1;
+		}
+
+		if (spdk_vhost_scsi_ctrlr_construct(name, cpumask) < 0) {
+			return -1;
+		}
+
+		for (i = 0; i < SPDK_VHOST_SCSI_CTRLR_MAX_DEVS; i++) {
+			snprintf(dev_name, sizeof(dev_name), "Dev%d", i);
+			lun_name = spdk_conf_section_get_val(sp, dev_name);
+			if (lun_name == NULL) {
+				continue;
+			}
+
+			if (spdk_vhost_scsi_ctrlr_add_dev(name, i, lun_name) < 0) {
+				return -1;
+			}
+		}
+
+		sp = spdk_conf_next_section(sp);
+
+	}
+
+	return 0;
+}
+
+static uint32_t
+spdk_vhost_scsi_allocate_reactor(uint64_t cpumask)
+{
+	uint32_t i, selected_core;
+	uint32_t min_ctrlrs;
+
+	cpumask &= spdk_app_get_core_mask();
+
+	if (cpumask == 0) {
+		return 0;
+	}
+
+	min_ctrlrs = INT_MAX;
+	selected_core = 0;
+
+	for (i = 0; i < RTE_MAX_LCORE && i < 64; i++) {
+		if (!((1ULL << i) & cpumask)) {
+			continue;
+		}
+
+		if (g_num_ctrlrs[i] < min_ctrlrs) {
+			selected_core = i;
+			min_ctrlrs = g_num_ctrlrs[i];
+		}
+	}
+
+	g_num_ctrlrs[selected_core]++;
+	return selected_core;
+}
+
+/*
+ * A new device is added to a data core. First the device is added to the main linked list
+ * and then allocated to a specific data core.
+ */
+static int
+new_device(int vid)
+{
+	struct virtio_net *dev = vhost_devices[vid];
+	struct spdk_vhost_scsi_ctrlr *vdev = NULL;
+	struct spdk_event *event;
+	sem_t added;
+	uint32_t i;
+
+	vdev = spdk_vhost_scsi_ctrlr_find(dev->ifname);
+	if (vdev == NULL) {
+		SPDK_ERRLOG("Controller %s not found.\n", dev->ifname);
+		return -1;
+	}
+
+	if (vdev->lcore != -1) {
+		SPDK_ERRLOG("Controller %s already connected.\n", dev->ifname);
+		return -1;
+	}
+
+	dpdk_vid_mapping[vid] = vdev;
+	vdev->dev = dev;
+
+	/* Disable notifications. */
+	for (i = 0; i < dev->num_queues; i++) {
+		rte_vhost_enable_guest_notification(vid, i, 0);
+	}
+
+	dev->flags |= VIRTIO_DEV_RUNNING;
+	vdev->dev = dev;
+
+	vdev->lcore = spdk_vhost_scsi_allocate_reactor(vdev->cpumask);
+
+	event = vhost_sem_event_alloc(vdev->lcore, add_vdev_cb, vdev, &added);
+	spdk_event_call(event);
+	if (vhost_sem_timedwait(&added, 1))
+		rte_panic("Failed to register new device '%s'\n", vdev->name);
+	return 0;
+}
+
+/*
+ * These callback allow devices to be added to the data core when configuration
+ * has been fully complete.
+ */
+static const struct virtio_net_device_ops virtio_net_device_ops = {
+	.new_device =  new_device,
+	.destroy_device = destroy_device,
+};
+
+static void *
+session_start(void *arg)
+{
+	rte_vhost_driver_session_start();
+	return NULL;
+}
+
+void
+spdk_vhost_startup(void *arg1, void *arg2)
+{
+	int ret;
+	pthread_t tid;
+	const char *basename = arg1;
+
+	if (basename) {
+		ret = snprintf(dev_dirname, sizeof(dev_dirname) - 2, "%s", basename);
+		if ((size_t)ret >= sizeof(dev_dirname) - 2)
+			rte_exit(EXIT_FAILURE, "Char dev dir path length %d is too long\n", ret);
+
+		if (dev_dirname[ret - 1] != '/') {
+			dev_dirname[ret] = '/';
+			dev_dirname[ret + 1]  = '\0';
+		}
+	}
+
+	ret = spdk_vhost_scsi_controller_construct();
+	if (ret != 0)
+		rte_exit(EXIT_FAILURE, "Cannot construct vhost controllers\n");
+
+	rte_vhost_driver_callback_register(&virtio_net_device_ops);
+
+	if (pthread_create(&tid, NULL, &session_start, NULL) < 0)
+		rte_panic("Failed to start session poller thread (%d): %s", errno, strerror(errno));
+	pthread_detach(tid);
+}
+
+static void *
+session_shutdown(void *arg)
+{
+	struct spdk_vhost_scsi_ctrlr *vdev = NULL;
+	int i;
+
+	for (i = 0; i < MAX_SCSI_CTRLRS; i++) {
+		vdev = spdk_vhost_ctrlrs[i];
+		if (vdev == NULL) {
+			continue;
+		}
+		rte_vhost_driver_unregister(vdev->name);
+	}
+
+	SPDK_NOTICELOG("Exiting\n");
+	spdk_app_stop(0);
+	return NULL;
+}
+
+/*
+ * When we receive a INT signal. Execute shutdown in separate thread to avoid deadlock.
+ */
+void
+spdk_vhost_shutdown_cb(void)
+{
+	pthread_t tid;
+	if (pthread_create(&tid, NULL, &session_shutdown, NULL) < 0)
+		rte_panic("Failed to start session shutdown thread (%d): %s", errno, strerror(errno));
+	pthread_detach(tid);
+}
+
+SPDK_LOG_REGISTER_TRACE_FLAG("vhost", SPDK_TRACE_VHOST)
+SPDK_LOG_REGISTER_TRACE_FLAG("vhost_ring", SPDK_TRACE_VHOST_RING)
+SPDK_LOG_REGISTER_TRACE_FLAG("vhost_queue", SPDK_TRACE_VHOST_QUEUE)
+SPDK_LOG_REGISTER_TRACE_FLAG("vhost_data", SPDK_TRACE_VHOST_DATA)
diff --git a/lib/vhost/vhost_rpc.c b/lib/vhost/vhost_rpc.c
new file mode 100644
index 000000000..493b19be2
--- /dev/null
+++ b/lib/vhost/vhost_rpc.c
@@ -0,0 +1,215 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <errno.h>
+
+#include "spdk_internal/log.h"
+#include "spdk/rpc.h"
+
+#include "spdk/vhost.h"
+#include "task.h"
+
+static void
+json_scsi_dev_write(struct spdk_json_write_ctx *ctx, struct spdk_scsi_dev *dev)
+{
+	int l;
+
+	spdk_json_write_name(ctx, "id");
+	spdk_json_write_int32(ctx, (int32_t)dev->id);
+
+	spdk_json_write_name(ctx, "device_name");
+	spdk_json_write_string(ctx, dev->name);
+
+	spdk_json_write_name(ctx, "luns");
+	spdk_json_write_array_begin(ctx);
+	for (l = 0; l < dev->maxlun; l++) {
+		if (NULL == dev->lun[l])
+			continue;
+
+		spdk_json_write_object_begin(ctx);
+
+		spdk_json_write_name(ctx, "id");
+		spdk_json_write_int32(ctx, (int32_t)dev->lun[l]->id);
+
+		spdk_json_write_name(ctx, "name");
+		spdk_json_write_string(ctx, dev->lun[l]->name);
+
+		spdk_json_write_object_end(ctx);
+	}
+	spdk_json_write_array_end(ctx);
+}
+
+static void
+spdk_rpc_get_vhost_scsi_controllers(struct spdk_jsonrpc_server_conn *conn,
+				    const struct spdk_json_val *params,
+				    const struct spdk_json_val *id)
+{
+	struct spdk_json_write_ctx *w;
+	struct spdk_vhost_scsi_ctrlr *ctrlr = NULL;
+	struct spdk_scsi_dev *dev;
+	uint32_t i;
+	char buf[32];
+
+	if (params != NULL) {
+		spdk_jsonrpc_send_error_response(conn, id, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+						 "get_vhost_scsi_controllers requires no parameters");
+		return;
+	}
+
+	w = spdk_jsonrpc_begin_result(conn, id);
+	spdk_json_write_array_begin(w);
+	while ((ctrlr = spdk_vhost_scsi_ctrlr_next(ctrlr)) != NULL) {
+		spdk_json_write_object_begin(w);
+
+		spdk_json_write_name(w, "ctrlr");
+		spdk_json_write_string(w, spdk_vhost_scsi_ctrlr_get_name(ctrlr));
+
+		spdk_json_write_name(w, "cpu_mask");
+		snprintf(buf, sizeof(buf), "%#" PRIx64, spdk_vhost_scsi_ctrlr_get_cpumask(ctrlr));
+		spdk_json_write_string(w, buf);
+
+		spdk_json_write_name(w, "scsi_devs");
+		spdk_json_write_array_begin(w);
+
+		for (i = 0; i < SPDK_VHOST_SCSI_CTRLR_MAX_DEVS; i++) {
+			dev = spdk_vhost_scsi_ctrlr_get_dev(ctrlr, i);
+			if (!dev)
+				continue;
+
+			spdk_json_write_object_begin(w);
+			spdk_json_write_name(w, "scsi_dev_num");
+			spdk_json_write_uint32(w, i);
+			json_scsi_dev_write(w, dev);
+			spdk_json_write_object_end(w);
+		}
+
+		spdk_json_write_array_end(w); // devs
+
+		spdk_json_write_object_end(w); // ctrl
+	}
+	spdk_json_write_array_end(w);
+	spdk_jsonrpc_end_result(conn, w);
+	return;
+}
+SPDK_RPC_REGISTER("get_vhost_scsi_controllers", spdk_rpc_get_vhost_scsi_controllers)
+
+struct rpc_vhost_scsi_ctrlr {
+	char *ctrlr;
+	char *cpumask;
+};
+
+static const struct spdk_json_object_decoder rpc_construct_vhost_ctrlr[] = {
+	{"ctrlr", offsetof(struct rpc_vhost_scsi_ctrlr, ctrlr), spdk_json_decode_string },
+	{"cpumask", offsetof(struct rpc_vhost_scsi_ctrlr, cpumask), spdk_json_decode_string, true},
+};
+
+static void
+spdk_rpc_construct_vhost_scsi_controller(struct spdk_jsonrpc_server_conn *conn,
+		const struct spdk_json_val *params,
+		const struct spdk_json_val *id)
+{
+	struct rpc_vhost_scsi_ctrlr req = {0};
+	struct spdk_json_write_ctx *w;
+	int rc;
+	uint64_t cpumask;
+
+	if (spdk_json_decode_object(params, rpc_construct_vhost_ctrlr,
+				    sizeof(rpc_construct_vhost_ctrlr) / sizeof(*rpc_construct_vhost_ctrlr),
+				    &req)) {
+		SPDK_TRACELOG(SPDK_TRACE_DEBUG, "spdk_json_decode_object failed\n");
+		rc = -EINVAL;
+		goto invalid;
+	}
+
+	cpumask = spdk_app_get_core_mask();
+	if (req.cpumask != NULL && spdk_vhost_parse_core_mask(req.cpumask, &cpumask)) {
+		rc = -EINVAL;
+		goto invalid;
+	}
+
+	rc = spdk_vhost_scsi_ctrlr_construct(req.ctrlr, cpumask);
+	if (rc < 0) {
+		goto invalid;
+	}
+
+	w = spdk_jsonrpc_begin_result(conn, id);
+	spdk_json_write_bool(w, true);
+	spdk_jsonrpc_end_result(conn, w);
+	return;
+invalid:
+	spdk_jsonrpc_send_error_response(conn, id, SPDK_JSONRPC_ERROR_INVALID_PARAMS, strerror(-rc));
+}
+SPDK_RPC_REGISTER("construct_vhost_scsi_controller", spdk_rpc_construct_vhost_scsi_controller)
+
+struct rpc_add_vhost_scsi_ctrlr_lun {
+	char *ctrlr;
+	uint32_t scsi_dev_num;
+	char *lun_name;
+};
+
+static const struct spdk_json_object_decoder rpc_vhost_add_lun[] = {
+	{"ctrlr", offsetof(struct rpc_add_vhost_scsi_ctrlr_lun, ctrlr), spdk_json_decode_string },
+	{"scsi_dev_num", offsetof(struct rpc_add_vhost_scsi_ctrlr_lun, scsi_dev_num), spdk_json_decode_uint32},
+	{"lun_name", offsetof(struct rpc_add_vhost_scsi_ctrlr_lun, lun_name), spdk_json_decode_string },
+};
+
+static void
+spdk_rpc_add_vhost_scsi_lun(struct spdk_jsonrpc_server_conn *conn,
+			    const struct spdk_json_val *params,
+			    const struct spdk_json_val *id)
+{
+	struct rpc_add_vhost_scsi_ctrlr_lun req = {0};
+	struct spdk_json_write_ctx *w;
+	int rc;
+
+	if (spdk_json_decode_object(params, rpc_vhost_add_lun,
+				    sizeof(rpc_vhost_add_lun) / sizeof(*rpc_vhost_add_lun),
+				    &req)) {
+		SPDK_TRACELOG(SPDK_TRACE_DEBUG, "spdk_json_decode_object failed\n");
+		rc = -EINVAL;
+		goto invalid;
+	}
+
+	rc = spdk_vhost_scsi_ctrlr_add_dev(req.ctrlr, req.scsi_dev_num, req.lun_name);
+	if (rc < 0) {
+		goto invalid;
+	}
+
+	w = spdk_jsonrpc_begin_result(conn, id);
+	spdk_json_write_bool(w, true);
+	spdk_jsonrpc_end_result(conn, w);
+	return;
+invalid:
+	spdk_jsonrpc_send_error_response(conn, id, SPDK_JSONRPC_ERROR_INVALID_PARAMS, strerror(-rc));
+}
+SPDK_RPC_REGISTER("add_vhost_scsi_lun", spdk_rpc_add_vhost_scsi_lun)
diff --git a/mk/spdk.app.mk b/mk/spdk.app.mk
index 30eeb9e17..5ae15f80d 100644
--- a/mk/spdk.app.mk
+++ b/mk/spdk.app.mk
@@ -35,12 +35,13 @@
 #  separately and wrapped in whole-archive linker args
 SPDK_RPC_LIB_LIST = $(filter %_rpc,$(SPDK_LIB_LIST))
 
-# Currently the iscsi, net, and scsi libraries contain their respective RPC methods
+# Currently some libraries contain their respective RPC methods
 #  rather than breaking them out into separate libraries.  So we must also include
 #  these directories in the RPC library list.
 SPDK_RPC_LIB_LIST += $(filter iscsi,$(SPDK_LIB_LIST))
 SPDK_RPC_LIB_LIST += $(filter net,$(SPDK_LIB_LIST))
 SPDK_RPC_LIB_LIST += $(filter scsi,$(SPDK_LIB_LIST))
+SPDK_RPC_LIB_LIST += $(filter vhost,$(SPDK_LIB_LIST))
 
 SPDK_REMAINING_LIB_LIST = $(filter-out $(SPDK_RPC_LIB_LIST),$(SPDK_LIB_LIST))
 
diff --git a/scripts/check_format.sh b/scripts/check_format.sh
index 689e7382e..a81239ed8 100755
--- a/scripts/check_format.sh
+++ b/scripts/check_format.sh
@@ -12,9 +12,13 @@ if hash astyle; then
 	echo -n "Checking coding style..."
 	rm -f astyle.log
 	touch astyle.log
-	astyle --options=.astylerc "*.c" >> astyle.log
+	# Exclude rte_vhost code imported from DPDK - we want to keep the original code
+	#  as-is to enable ongoing work to synch with a generic upstream DPDK vhost library,
+	#  rather than making diffs more complicated by a lot of changes to follow SPDK
+	#  coding standards.
+	astyle --options=.astylerc "*.c" --exclude="rte_vhost" >> astyle.log
 	astyle --options=.astylerc --exclude=test/cpp_headers "*.cpp" >> astyle.log
-	astyle --options=.astylerc "*.h" >> astyle.log
+	astyle --options=.astylerc "*.h" --exclude="rte_vhost" >> astyle.log
 	if grep -q "^Formatted" astyle.log; then
 		echo " errors detected"
 		git diff
diff --git a/scripts/rpc.py b/scripts/rpc.py
index fd97f14a7..7c0055904 100755
--- a/scripts/rpc.py
+++ b/scripts/rpc.py
@@ -417,15 +417,16 @@ p = subparsers.add_parser('get_vhost_scsi_controllers', help='List vhost control
 p.set_defaults(func=get_vhost_scsi_controllers)
 
 def construct_vhost_scsi_controller(args):
-    params = {
-        'ctrlr': args.ctrlr,
-        'cpumask': args.cpu_mask
-    }
+    params = {'ctrlr': args.ctrlr}
+
+    if args.cpumask:
+        params['cpumask'] = args.cpumask
+
     jsonrpc_call('construct_vhost_scsi_controller', params)
 
 p = subparsers.add_parser('construct_vhost_scsi_controller', help='Add new vhost controller')
-p.add_argument('ctrlr', help='conntroller name')
-p.add_argument('cpumask', help='cpu mask for this controller')
+p.add_argument('ctrlr', help='controller name')
+p.add_argument('--cpumask', help='cpu mask for this controller')
 p.set_defaults(func=construct_vhost_scsi_controller)
 
 def add_vhost_scsi_lun(args):
diff --git a/test/vhost/ext4test/ext4connect.sh b/test/vhost/ext4test/ext4connect.sh
new file mode 100755
index 000000000..6c0980d47
--- /dev/null
+++ b/test/vhost/ext4test/ext4connect.sh
@@ -0,0 +1,55 @@
+#!/usr/bin/env bash
+
+testdir=$(readlink -f $(dirname $0))
+rootdir=$testdir/../../..
+source $rootdir/scripts/autotest_common.sh
+
+script='shopt -s nullglob; \
+    for entry in /sys/block/sd*; do \
+        disk_type="$(cat $entry/device/vendor)"; \
+           if [[ $disk_type == Intel* ]] || [[ $disk_type == RAWSCSI* ]] || [[ $disk_type == LIO-ORG* ]]; then \
+                fname=$(basename $entry); \
+                echo -n "$fname "; \
+           fi; \
+    done'
+
+devs="$(echo "$script" | bash -s)"
+
+timing_enter ext4test
+
+trap "exit 1" SIGINT SIGTERM EXIT
+
+for dev in $devs; do
+        mkfs.ext4 -F /dev/$dev
+        mkdir -p /mnt/${dev}dir
+        mount -o sync /dev/$dev /mnt/${dev}dir
+        rsync -qav --exclude=".git" $rootdir/ /mnt/${dev}dir/spdk
+        sleep 2
+        make -C /mnt/${dev}dir/spdk -j8 clean
+        make -C /mnt/${dev}dir/spdk -j8
+
+        # Print out space consumed on target device to help decide
+        #  if/when we need to increase the size of the malloc LUN
+        df -h /dev/$dev
+        rm -rf /mnt/${dev}dir/spdk
+done
+
+for dev in $devs; do
+        umount /mnt/${dev}dir
+        rm -rf /mnt/${dev}dir
+
+        stats=( $(cat /sys/block/$dev/stat) )
+        echo ""
+        echo "$dev stats"
+        printf "READ  IO cnt: % 8u merges: % 8u sectors: % 8u ticks: % 8u\n" \
+                   ${stats[0]} ${stats[1]} ${stats[2]} ${stats[3]}
+        printf "WRITE IO cnt: % 8u merges: % 8u sectors: % 8u ticks: % 8u\n" \
+                   ${stats[4]} ${stats[5]} ${stats[6]} ${stats[7]}
+        printf "in flight: % 8u io ticks: % 8u time in queue: % 8u\n" \
+                   ${stats[8]} ${stats[9]} ${stats[10]}
+        echo ""
+done
+
+trap - SIGINT SIGTERM EXIT
+
+timing_exit ext4test
diff --git a/test/vhost/ext4test/ext4start.sh b/test/vhost/ext4test/ext4start.sh
new file mode 100755
index 000000000..283df48e9
--- /dev/null
+++ b/test/vhost/ext4test/ext4start.sh
@@ -0,0 +1,97 @@
+#!/usr/bin/env bash
+
+testdir=$(readlink -f $(dirname $0))
+rootdir=$testdir/../../..
+source $rootdir/scripts/autotest_common.sh
+
+if [ -z "$VM_IMG" ]; then
+    echo "VM_IMG: path to qcow2 image not provided - not running"
+    exit 1
+fi
+
+if [ -z "$VM_QEMU" ]; then
+    echo "VM_QEMU: path to qemu binary not provided - not running"
+    exit 1
+fi
+
+HOST_IP=192.168.122.1
+VM_IP=192.168.122.254
+VM_UNAME="root"
+VM_PASS="root"
+VM_NAME="ext4test_vm"
+VM_NET_NAME="test_net"
+VM_MAC="02:de:ad:de:ad:01"
+VM_BAK_IMG="/tmp/ext4test_backing.img"
+TIMEO=60
+SSHCMD="sshpass -p $VM_PASS ssh"
+SCPCMD="sshpass -p $VM_PASS scp"
+
+function cleanup_virsh() {
+    virsh destroy $VM_NAME
+    virsh net-destroy $VM_NET_NAME
+    rm $VM_BAK_IMG
+}
+
+timing_enter ext4test
+
+qemu-img create -f qcow2 -o backing_file=$VM_IMG $VM_BAK_IMG
+
+cp $testdir/spdk_vm_base.xml $testdir/spdk_vm.xml
+cp $testdir/spdk_vnet_base.xml $testdir/spdk_vnet.xml
+
+sed -i "s@<name></name>@<name>$VM_NAME</name>@g" $testdir/spdk_vm.xml
+sed -i "s@source file=''@source file='$VM_BAK_IMG'@g" $testdir/spdk_vm.xml
+sed -i "s@<emulator></emulator>@<emulator>$VM_QEMU</emulator>@g" $testdir/spdk_vm.xml
+sed -i "s@<name></name>@<name>$VM_NET_NAME</name>@g" $testdir/spdk_vnet.xml
+
+trap "cleanup_virsh; killprocess $pid; exit 1" SIGINT SIGTERM EXIT
+
+virsh net-create $testdir/spdk_vnet.xml
+
+# Change directory and ownership because virsh has issues with
+# paths that are in /root tree
+cd /tmp
+$rootdir/app/vhost/vhost -c $testdir/vhost.conf &
+pid=$!
+echo "Process pid: $pid"
+sleep 10
+chmod 777 /tmp/naa.123
+
+tar --exclude '.git' --exclude 'spdk.tgz' --exclude '*.d' --exclude '*.o' -zcf /tmp/spdk_host.tgz $rootdir
+
+virsh create $testdir/spdk_vm.xml
+virsh net-update $VM_NET_NAME add ip-dhcp-host "<host mac='$VM_MAC' name='$VM_NAME' ip='$VM_IP'/>"
+
+# Wait for VM to boot, disable trap temporarily
+# so that we don't exit on first fail
+echo "Trying to connect to virtual machine..."
+trap - SIGINT SIGTERM EXIT
+set +xe
+rc=-1
+while [[ $TIMEO -gt 0 && rc -ne 0 ]]; do
+    $SSHCMD root@$VM_IP -q -oStrictHostKeyChecking=no 'echo Hello'
+    rc=$?
+    ((TIMEO-=1))
+done
+set -xe
+trap "cleanup_virsh; killprocess $pid; exit 1" SIGINT SIGTERM EXIT
+
+if [[ $TIMEO -eq 0  ||  rc -ne 0 ]]; then
+    echo "VM did not boot properly, exiting"
+    exit 1
+fi
+
+$SSHCMD root@$VM_IP 'mkdir -p /tmp/spdk'
+$SCPCMD -r /tmp/spdk_host.tgz root@$VM_IP:/tmp/spdk
+$SSHCMD root@$VM_IP 'cd /tmp/spdk; tar xf spdk_host.tgz'
+$SSHCMD root@$VM_IP '/tmp/spdk/test/vhost/ext4test/ext4connect.sh'
+
+#read -p "Hit enter to exit..."
+
+trap - SIGINT SIGTERM EXIT
+
+cleanup_virsh
+rm $testdir/spdk_vm.xml
+rm $testdir/spdk_vnet.xml
+killprocess $pid
+timing_exit ext4test
diff --git a/test/vhost/ext4test/spdk_vm_base.xml b/test/vhost/ext4test/spdk_vm_base.xml
new file mode 100644
index 000000000..4df40a3a0
--- /dev/null
+++ b/test/vhost/ext4test/spdk_vm_base.xml
@@ -0,0 +1,69 @@
+<?xml version="1.0"?>
+<domain xmlns:qemu="http://libvirt.org/schemas/domain/qemu/1.0" type="kvm">
+  <name/>
+  <memory unit="GiB">2</memory>
+  <currentMemory unit="GiB">2</currentMemory>
+  <vcpu placement="static">4</vcpu>
+  <os>
+    <type arch="x86_64" machine="pc-i440fx-1.6">hvm</type>
+    <boot dev="hd"/>
+  </os>
+  <features>
+    <acpi/>
+    <apic/>
+    <pae/>
+  </features>
+  <cpu mode="host-model">
+    <model fallback="allow"/>
+  </cpu>
+  <clock offset="utc"/>
+  <on_poweroff>destroy</on_poweroff>
+  <on_reboot>restart</on_reboot>
+  <on_crash>destroy</on_crash>
+  <devices>
+    <emulator/>
+    <disk type="file" device="disk">
+      <driver name="qemu" type="qcow2"/>
+      <source file=""/>
+      <backingStore/>
+      <target dev="hda" bus="ide"/>
+      <address type="drive" domain="0" bus="0" slot="0" function="0"/>
+    </disk>
+    <controller type="usb" index="0">
+      <address type="pci" domain="0x0000" bus="0x00" slot="0x01" function="0x2"/>
+    </controller>
+    <controller type="pci" index="0" model="pci-root"/>
+    <interface type="network">
+      <mac address="02:de:ad:de:ad:01"/>
+      <source network="test_net"/>
+      <model type="virtio"/>
+      <address type="pci" domain="0x0000" bus="0x00" slot="0x03" function="0x0"/>
+    </interface>
+    <serial type="pty">
+      <target port="0"/>
+    </serial>
+    <console type="pty">
+      <target type="serial" port="0"/>
+    </console>
+    <input type="mouse" bus="ps2"/>
+    <input type="keyboard" bus="ps2"/>
+    <graphics type="vnc" port="-1" autoport="yes"/>
+    <video>
+      <model type="cirrus" vram="16384" heads="1"/>
+      <address type="pci" domain="0x0000" bus="0x00" slot="0x02" function="0x0"/>
+    </video>
+    <memballoon model="virtio">
+      <address type="pci" domain="0x0000" bus="0x00" slot="0x05" function="0x0"/>
+    </memballoon>
+  </devices>
+  <qemu:commandline>
+    <qemu:arg value="-object"/>
+    <qemu:arg value="memory-backend-file,id=mem,size=2048M,mem-path=/mnt/huge,share=on"/>
+    <qemu:arg value="-numa"/>
+    <qemu:arg value="node,memdev=mem"/>
+    <qemu:arg value="-chardev"/>
+    <qemu:arg value="socket,id=char0,path=/tmp/naa.123"/>
+    <qemu:arg value="-device"/>
+    <qemu:arg value="vhost-scsi-pci,id=scsi0,wwpn=naa.123,user=true,chardev=char0"/>
+  </qemu:commandline>
+</domain>
diff --git a/test/vhost/ext4test/spdk_vnet_base.xml b/test/vhost/ext4test/spdk_vnet_base.xml
new file mode 100644
index 000000000..53863f1cb
--- /dev/null
+++ b/test/vhost/ext4test/spdk_vnet_base.xml
@@ -0,0 +1,11 @@
+<?xml version="1.0"?>
+<network>
+  <name/>
+  <bridge name="virbr123"/>
+  <forward/>
+  <ip address="192.168.122.1" netmask="255.255.255.0">
+    <dhcp>
+      <range end="192.168.122.254" start="192.168.122.2"/>
+    </dhcp>
+  </ip>
+</network>
diff --git a/test/vhost/ext4test/vhost.conf b/test/vhost/ext4test/vhost.conf
new file mode 100644
index 000000000..1b41af505
--- /dev/null
+++ b/test/vhost/ext4test/vhost.conf
@@ -0,0 +1,47 @@
+# spdk configuration file
+#
+# Please write all parameters using ASCII.
+# The parameter must be quoted if it includes whitespace.
+#
+# Configuration syntax:
+# Spaces at head of line are deleted, other spaces are as separator
+# Lines starting with '#' are comments and not evaluated.
+# Lines ending with '\' are concatenated with the next line.
+# Bracketed keys are section keys grouping the following value keys.
+# Number of section key is used as a tag number.
+#  Ex. [TargetNode1] = TargetNode section key with tag number 1
+[Global]
+  # Users can restrict work items to only run on certain cores by
+  #  specifying a WorkerMask.  Default is to allow work items to run
+  #  on all cores.
+  #WorkerMask 0xFFFF
+
+  # Event mask for ids history buffers
+  # Default: 0x0 (all events disabled)
+  # Set to 0xFFFFFFFFFFFFFFFF to enable all events.
+  #EventMask 0x0
+
+  # syslog facility
+  LogFacility "local7"
+
+[Rpc]
+  # Defines whether ids will enable configuration via RPC.
+  # Default is disabled.  Note that the RPC interface is not
+  # authenticated, so users should be careful about enabling
+  # RPC in non-trusted environments.
+  Enable Yes
+
+[Ioat]
+  Disable Yes
+
+[Malloc]
+  NumberOfLuns 1
+  LunSizeInMb 512
+
+[Nvme]
+  UnbindFromKernel Yes
+
+ [VhostScsi0]
+  Name naa.123
+  Dev0 Nvme0n1
+  Dev1 Malloc0
diff --git a/test/vhost/fiotest/README b/test/vhost/fiotest/README
new file mode 100644
index 000000000..d98cd5ede
--- /dev/null
+++ b/test/vhost/fiotest/README
@@ -0,0 +1,85 @@
+Overview
+---
+Utility scripts for automated FIO tests of virtual machines.
+Virtualization is done using QEMU software.
+
+Requirements
+---
+- 'fio' and 'perf' packages must be installed in order for tests to run
+- Installed fio version must be the same as fio installed on qemu guest
+  systems. Another solution is to provide the path to a FIO binary
+  in arguments for testing scripts. If fio versions are different tests will not run.
+- All dependency packages for building QEMU.
+- QEMU source package. By default it is expected to be in the "qemu" directory in the
+  root dir of the main spdk directory.
+- a qemu-compatible VM image.
+- RSA key for VM SSH access in $HOME/.ssh/spdk_vhost_id_rsa or in a different
+  directory specified by $SPDK_VHOST_SSH_KEY_FILE global variable.
+
+Files:
+---
+common.sh
+	Header file to be included in other files.
+
+autotest.sh
+	Script to perform automated fio test with given number of virtual machines
+	and given scenario type (virtio / kernel vhost / spdk vhost).
+	Can run an end-to-end test or with "--dry-run" option can just enable
+	virtual machines and leave them for user's manual tests.
+
+run_vhost.sh
+	Run single instance of vhost application. Useful during development.
+	See 'run_vhost.sh --help'
+
+run_fio.py
+	Script used to run fio utility on group of virtual machines
+	using default configuration or with parameters specified for
+	autotest.sh execution.
+	Script can also be executed with manually input parameters, resulting
+	in launching multiple fio jobs which are then combinations of all
+	parameters.
+	See 'python run_fio.py --help'
+
+vm_setup.sh
+	Utility script used to create a virtual machine
+	with spcified disk/block device and cache type for tests.
+	Useful during development.
+	See 'vm_setup.sh --help'
+
+vm_run.sh
+	Utility script used to enable selected virtual machines.
+	Can enable all or specific virtual machines from  directory.
+	Before running this script make sure that there was at least 1
+	virtual machine created using vm_setup.sh script.
+	Useful during development.
+	See 'vm_run.sh --help'
+
+vm_shutdown.sh
+	Utility script used to shut down all or specific virtual machines
+	if any remain active after test run.
+	Useful during development.
+	See 'vm_shutdown.sh --help'
+
+vm_ssh.sh
+	Utility script used to connect to specific virtual machine via ssh.
+	Useful during development.
+	See 'vm_ssh.sh --help'
+
+Examples:
+---
+
+--- Example 1, simple run:
+
+In spdk directory execute:
+./test/vhost/autotest.sh --vm=0,<path to VM image>,<device> --fio-bin=<path to fio bin>
+
+<device> 				- backend used for testing, e.g. Malloc0, Nvme0n1...
+<path to fio bin> 		- path to FIO binary
+
+This runs tests for 1 VM using spdk vhost.
+By default all jobs defined in test/vhost/fiotest/fio_jobs/ are executed sequentially.
+
+--- Example 2, multiple VMs:
+
+./test/vhost/autotest.sh --vm=0,<path to VM image>,<device> --vm=1,<path to VM image 2>,<device 2>
+Same configuration as Example 1 but fio runs in parallel on 2 VMs
diff --git a/test/vhost/fiotest/autotest.config b/test/vhost/fiotest/autotest.config
new file mode 100644
index 000000000..f093d3797
--- /dev/null
+++ b/test/vhost/fiotest/autotest.config
@@ -0,0 +1,5 @@
+vhost_reactor_mask=0x1
+vhost_master_core=0
+
+qemu_mask=0x2
+qemu_numa_node=0
diff --git a/test/vhost/fiotest/autotest.sh b/test/vhost/fiotest/autotest.sh
new file mode 100755
index 000000000..45fdac33b
--- /dev/null
+++ b/test/vhost/fiotest/autotest.sh
@@ -0,0 +1,257 @@
+#!/usr/bin/env bash
+set -e
+BASE_DIR=$(readlink -f $(dirname $0))
+[[ -z "$TEST_DIR" ]] && TEST_DIR="$(cd $BASE_DIR/../../../../ && pwd)"
+
+dry_run=false
+no_shutdown=false
+fio_bin="fio"
+fio_jobs="$BASE_DIR/fio_jobs/"
+test_type=spdk_vhost
+reuse_vms=false
+force_build=false
+vms=()
+used_vms=""
+disk_split=""
+x=""
+
+max_sectors_kb=4
+
+function usage()
+{
+	[[ ! -z $2 ]] && ( echo "$2"; echo ""; )
+	echo "Shortcut script for doing automated test"
+	echo "Usage: $(basename $1) [OPTIONS]"
+	echo
+	echo "-h, --help                print help and exit"
+	echo "    --test-type=TYPE      Perform specified test:"
+	echo "                          virtio - test host virtio-scsi-pci using file as disk image"
+	echo "                          kernel_vhost - use kernel driver vhost-scsi"
+	echo "                          spdk_vhost - use spdk vhost"
+	echo "-x                        set -x for script debug"
+	echo "    --fio-bin=FIO         Use specific fio binary (will be uploaded to VM)"
+	echo "    --qemu-src=QEMU_DIR   Location of the QEMU sources"
+	echo "    --dpdk-src=DPDK_DIR   Location of the DPDK sources"
+	echo "    --fio-jobs=           Fio configs to use for tests. Can point to a directory or"
+	echo "                          can point to a directory with regex mask, example: ./dir/*.job"
+	echo "                          All VMs will run the same fio job when FIO executes."
+	echo "                          (no unique jobs for specific VMs)"
+	echo "    --work-dir=WORK_DIR   Where to find build file. Must exist. [default: $TEST_DIR]"
+	echo "    --dry-run             Don't perform any tests, run only and wait for enter to terminate"
+	echo "    --no-shutdown         Don't shutdown at the end but leave envirionment working"
+	echo "    --force-build         Force SPDK rebuild with the specified DPDK path."
+	echo "    --vm=NUM[,OS][,DISKS] VM configuration. This parameter might be used more than once:"
+	echo "                          NUM - VM number (mandatory)"
+	echo "                          OS - VM os disk path (optional)"
+	echo "                          DISKS - VM os test disks/devices path (virtio - optional, kernel_vhost - mandatory)"
+	echo "    --disk-split          By default all test types execute fio jobs on all disks which are available on guest"
+	echo "                          system. Use this option if only some of the disks should be used for testing."
+	echo "                          Example: --disk-split=4,1-3 will result in VM 1 using it's first disk (ex. /dev/sda)"
+	echo "                          and VM 2 using it's disks 1-3 (ex. /dev/sdb, /dev/sdc, /dev/sdd)"
+	echo "    --max-sectors=NUM     Set max_sectors_kb for test disk to NUM (default: $max_sectors_kb)"
+	exit 0
+}
+
+#default raw file is NVMe drive
+
+while getopts 'xh-:' optchar; do
+	case "$optchar" in
+		-)
+		case "$OPTARG" in
+			help) usage $0 ;;
+			work-dir=*) TEST_DIR="${OPTARG#*=}" ;;
+			fio-bin=*) fio_bin="--fio-bin=${OPTARG#*=}" ;;
+			qemu-src=*) QEMU_SRC_DIR="${OPTARG#*=}" ;;
+			dpdk-src=*) DPDK_SRC_DIR="${OPTARG#*=}" ;;
+			fio-jobs=*) fio_jobs="${OPTARG#*=}" ;;
+			dry-run) dry_run=true ;;
+			no-shutdown) no_shutdown=true ;;
+			test-type=*) test_type="${OPTARG#*=}" ;;
+			force-build) force_build=true ;;
+			vm=*) vms+=("${OPTARG#*=}") ;;
+			disk-split=*) disk_split="${OPTARG#*=}" ;;
+			max-sectors=*) max_sectors_kb="${OPTARG#*=}" ;;
+			*) usage $0 "Invalid argument '$OPTARG'" ;;
+		esac
+		;;
+	h) usage $0 ;;
+	x) set -x
+		x="-x" ;;
+	*) usage $0 "Invalid argument '$OPTARG'"
+	esac
+done
+shift $(( OPTIND - 1 ))
+
+if [[ -d "$fio_jobs" ]]; then
+	fio_jobs="$fio_jobs/*.job"
+fi
+
+. $BASE_DIR/common.sh
+
+trap 'error_exit "${FUNCNAME}" "${LINENO}"' ERR
+
+echo "==============="
+echo "INFO: checking qemu"
+
+if [[ ! -x $INSTALL_DIR/bin/qemu-system-x86_64 ]]; then
+	echo "INFO: can't find $INSTALL_DIR/bin/qemu-system-x86_64 - building and installing"
+
+	if [[ ! -d $QEMU_SRC_DIR ]]; then
+		echo "ERROR: Cannot find qemu source in $QEMU_SRC_DIR"
+		exit 1
+	else
+		echo "INFO: qemu source exists $QEMU_SRC_DIR - building"
+		qemu_build_and_install
+	fi
+fi
+
+echo "==============="
+echo ""
+echo "INFO: checking spdk"
+echo ""
+
+if [[ ! -x $SPDK_BUILD_DIR/app/vhost/vhost ]] || $force_build ; then
+	echo "INFO: $SPDK_BUILD_DIR/app/vhost/vhost - building and installing"
+	spdk_build_and_install
+fi
+
+vm_kill_all
+
+if [[ $test_type == "spdk_vhost" ]]; then
+	echo "==============="
+	echo ""
+	echo "INFO: running SPDK"
+	echo ""
+	$BASE_DIR/run_vhost.sh $x --work-dir=$TEST_DIR
+	echo
+fi
+
+echo "==============="
+echo ""
+echo "Setting up VM"
+echo ""
+
+rpc_py="python $SPDK_BUILD_DIR/scripts/rpc.py "
+rpc_py+="-s 127.0.0.1 "
+
+for vm_conf in ${vms[@]}; do
+	IFS=',' read -ra conf <<< "$vm_conf"
+	setup_cmd="$BASE_DIR/vm_setup.sh $x --work-dir=$TEST_DIR --test-type=$test_type"
+	if [[ x"${conf[0]}" == x"" ]] || ! assert_number ${conf[0]}; then
+		echo "ERROR: invalid VM configuration syntax $vm_conf"
+		exit 1;
+	fi
+
+	# Sanity check if VM is not defined twice
+	for vm_num in $used_vms; do
+		if [[ $vm_num -eq ${conf[0]} ]]; then
+			echo "ERROR: VM$vm_num defined more than twice ( $(printf "'%s' " "${vms[@]}"))!"
+			exit 1
+		fi
+	done
+
+	setup_cmd+=" -f ${conf[0]}"
+	used_vms+=" ${conf[0]}"
+	[[ x"${conf[1]}" != x"" ]] && setup_cmd+=" --os=${conf[1]}"
+	[[ x"${conf[2]}" != x"" ]] && setup_cmd+=" --disk=${conf[2]}"
+
+	if [[ $test_type == "spdk_vhost" ]]; then
+		echo "INFO: Adding device via RPC ..."
+		echo ""
+
+		eval $(grep "^vhost_reactor_mask=" $BASE_DIR/autotest.config)
+		while IFS=':' read -ra disks; do
+			for disk in "${disks[@]}"; do
+				$rpc_py construct_vhost_scsi_controller naa.$disk.${conf[0]} \
+				--cpumask $vhost_reactor_mask
+				$rpc_py add_vhost_scsi_lun naa.$disk.${conf[0]} 0 $disk
+			done
+		done <<< "${conf[2]}"
+		unset IFS;
+		$rpc_py get_vhost_scsi_controllers
+	fi
+	$setup_cmd
+done
+
+# Run everything
+$BASE_DIR/vm_run.sh $x --work-dir=$TEST_DIR $used_vms
+vm_wait_for_boot 600 $used_vms
+
+echo "==============="
+echo ""
+echo "INFO: Testing..."
+
+echo "INFO: Running fio jobs ..."
+run_fio="python $BASE_DIR/run_fio.py "
+run_fio+="$fio_bin "
+run_fio+="--job-file="
+for job in $fio_jobs; do
+	run_fio+="$job,"
+done
+run_fio="${run_fio::-1}"
+run_fio+=" "
+run_fio+="--out=$TEST_DIR "
+
+if [[ ! $disk_split == '' ]]; then
+	run_fio+="--split-disks=$disk_split "
+fi
+
+# Check if all VM have disk in tha same location
+DISK=""
+
+for vm_num in $used_vms; do
+	vm_dir=$VM_BASE_DIR/$vm_num
+	host_name="VM-$vm_num-$(cat $BASE_DIR/autotest.config|grep qemu_mask|awk -F'=' '{print $2}'|sed "$(($vm_num+1))q;d")"
+	echo "INFO: Setting up hostname: $host_name"
+	vm_ssh $vm_num "hostname $host_name"
+	vm_start_fio_server $fio_bin $readonly $vm_num
+	vm_check_scsi_location $vm_num
+
+	SCSI_DISK="${SCSI_DISK::-1}"
+	for DISK in $SCSI_DISK; do
+		echo "INFO: VM$vm_num Setting max_sectors_kb=$max_sectors_kb on disk $DISK"
+		echo ""
+		vm_ssh $vm_num "echo $max_sectors_kb > /sys/block/$DISK/queue/max_sectors_kb"
+	done
+
+	vm_reset_scsi_devices $vm_num $SCSI_DISK
+
+	run_fio+="127.0.0.1:$(cat $vm_dir/fio_socket):"
+	for disk in $SCSI_DISK; do
+		run_fio+="$disk:"
+	done
+	run_fio="${run_fio::-1}"
+	run_fio+=","
+done
+
+run_fio="${run_fio%,}"
+run_fio+=" "
+run_fio="${run_fio::-1}"
+
+echo -e "$run_fio"
+
+if $dry_run; then
+	read -p "Enter to kill evething" xx
+	sleep 3
+	at_app_exit
+	exit 0
+fi
+
+$run_fio
+
+for vm_num in $used_vms; do
+	vm_reset_scsi_devices $vm_num $SCSI_DISK
+done
+
+if ! $no_shutdown; then
+	echo "==============="
+	echo "INFO: Testing done -> shutting down"
+	at_app_exit
+	echo "==============="
+else
+	echo "==============="
+	echo
+	echo "INFO: Leaving environment working!"
+	echo ""
+	echo "==============="
+fi
diff --git a/test/vhost/fiotest/common.sh b/test/vhost/fiotest/common.sh
new file mode 100644
index 000000000..21fcefc35
--- /dev/null
+++ b/test/vhost/fiotest/common.sh
@@ -0,0 +1,756 @@
+set -e
+
+BASE_DIR=$(readlink -f $(dirname $0))
+
+MAKE="make -j$(( $(nproc)  * 2 ))"
+
+# Default running dir -> spdk/..
+[[ -z "$TEST_DIR" ]] && TEST_DIR=$BASE_DIR/../../../../
+
+TEST_DIR="$(mkdir -p $TEST_DIR && cd $TEST_DIR && echo $PWD)"
+
+SPDK_SRC_DIR=$TEST_DIR/spdk
+SPDK_BUILD_DIR=$BASE_DIR/../../../
+
+SPDK_VHOST_SCSI_TEST_DIR=$TEST_DIR/vhost
+
+# QEMU source and build folders
+[[ -z "$QEMU_SRC_DIR" ]] && QEMU_SRC_DIR="$TEST_DIR/qemu"
+QEMU_BUILD_DIR="$QEMU_SRC_DIR/build"
+
+# DPDK source and build folders
+[[ -z "$DPDK_SRC_DIR" ]] && DPDK_SRC_DIR="$TEST_DIR/dpdk"
+
+# SSH key file
+[[ -z "$SPDK_VHOST_SSH_KEY_FILE" ]] && SPDK_VHOST_SSH_KEY_FILE="$HOME/.ssh/spdk_vhost_id_rsa"
+if [[ ! -e "$SPDK_VHOST_SSH_KEY_FILE" ]]; then
+	echo "Could not find SSH key file $SPDK_VHOST_SSH_KEY_FILE"
+	exit 1
+fi
+echo "Using SSH key file $SPDK_VHOST_SSH_KEY_FILE"
+
+VM_CNT=0
+
+VM_BASE_DIR="$TEST_DIR/vms"
+
+
+INSTALL_DIR="$TEST_DIR/root"
+
+mkdir -p $TEST_DIR
+
+###
+# Building functions
+###
+
+function error()
+{
+	echo "==========="
+	echo -e "ERROR: $@"
+	echo "==========="
+	return 1
+}
+
+# Build QEMU from $QEMU_SRC_DIR directory in $QEMU_BUILD_DIR and install in $INSTALL_DIR
+#
+# NOTE: It will use CCACHE if detected.
+# FIXME: quiet configuration an build
+#
+function qemu_build_and_install()
+{
+	mkdir -p $QEMU_BUILD_DIR
+	cd $QEMU_BUILD_DIR
+
+	echo "INFO: Configuring QEMU from source in $QEMU_SRC_DIR"
+	if type ccache > /dev/null 2>&1; then
+		echo "INFO: CCACHE detected"
+		export CC="ccache cc"
+		export CXX="ccache c++"
+		export CPP="ccache cpp"
+	else
+		echo "INFO: CCACHE NOT detected - consider installing."
+	fi
+
+	$QEMU_SRC_DIR/configure --prefix=$INSTALL_DIR \
+		--target-list="x86_64-softmmu" \
+		--enable-kvm --enable-linux-aio --enable-numa
+
+	echo "INFO: Compiling and installing QEMU in $INSTALL_DIR"
+	$MAKE install
+	echo "INFO: DONE"
+}
+
+# Build SPDK using $SPDK_SRC as source directory.
+function spdk_build_and_install()
+{
+	echo "INFO: Building SPDK"
+	echo "checking dependencies..."
+	case `uname` in
+		FreeBSD)
+			local dpdk_target=x86_64-native-bsdapp-clang
+			;;
+		Linux)
+			local dpdk_target=x86_64-native-linuxapp-gcc
+			;;
+		*)
+			echo "Unknown OS in $0"
+			exit 1
+			;;
+	esac
+
+	if [[ ! -x $DPDK_SRC_DIR/$dpdk_target ]]; then
+		echo "ERROR: can't find $DPDK_SRC_DIR/$dpdk_target"
+		exit 1
+	fi
+
+	cd $SPDK_BUILD_DIR
+
+	$MAKE clean
+	$MAKE DPDK_DIR=$DPDK_SRC_DIR
+
+	echo "INFO: DONE"
+}
+
+function spdk_vhost_run()
+{
+	local vhost_app="$SPDK_BUILD_DIR/app/vhost/vhost"
+	local vhost_log_file="$SPDK_VHOST_SCSI_TEST_DIR/vhost.log"
+	local vhost_pid_file="$SPDK_VHOST_SCSI_TEST_DIR/vhost.pid"
+	local vhost_socket="$SPDK_VHOST_SCSI_TEST_DIR/usvhost"
+	local vhost_conf_file="$BASE_DIR/vhost.conf"
+	echo "INFO: starting vhost app in background"
+	[[ -r "$vhost_pid_file" ]] && spdk_vhost_kill
+	[[ -d $SPDK_VHOST_SCSI_TEST_DIR ]] && rm -f $SPDK_VHOST_SCSI_TEST_DIR/*
+	mkdir -p $SPDK_VHOST_SCSI_TEST_DIR
+
+	if [[ ! -x $vhost_app ]]; then
+		error "application not found: $vhost_app"
+		return 1
+	fi
+
+	local cmd="$vhost_app -m $(cat $BASE_DIR/autotest.config|grep vhost_reactor_mask|awk -F'=' '{print $2}') \
+	-p $(cat $BASE_DIR/autotest.config|grep vhost_master_core|awk -F'=' '{print $2}') \
+	-c $vhost_conf_file"
+
+	echo "INFO: Loging to:   $vhost_log_file"
+	echo "INFO: Config file: $vhost_conf_file"
+	echo "INFO: Socket:      $vhost_socket"
+	echo "INFO: Command:     $cmd"
+
+	( cd $SPDK_VHOST_SCSI_TEST_DIR; $cmd & echo $! >&3) 3>$vhost_pid_file  2>&1 | tee -a $vhost_log_file &
+
+	echo "INFO: waiting 25s to allow app to run..."
+	sleep 25
+	kill -0 $(cat $vhost_pid_file)
+	echo "INFO: vhost started - pid=$(cat $vhost_pid_file)"
+}
+
+function spdk_vhost_kill()
+{
+	local vhost_pid_file="$SPDK_VHOST_SCSI_TEST_DIR/vhost.pid"
+
+	if [[ ! -r $vhost_pid_file ]]; then
+		echo "WARN: no vhost pid file found"
+		return 0
+	fi
+
+	local vhost_pid="$(cat $vhost_pid_file)"
+	echo "INFO: killing vhost (PID $vhost_pid) app"
+
+	if /bin/kill -INT $vhost_pid >/dev/null; then
+		echo "INFO: vhost app killed - waiting to exit"
+		while /bin/kill -0 $vhost_pid; do
+			echo "."
+			sleep 1
+		done
+	elif /bin/kill -0 $vhost_pid; then
+		error "vhost NOT killed - you need to kill it manually"
+		return 1
+	else
+		echo "INFO: vhost was no running"
+	fi
+
+	rm $vhost_pid_file
+}
+
+###
+# Mgmt functions
+###
+
+function assert_number()
+{
+	[[ "$1" =~ [0-9]+ ]] && return 0
+
+	echo "${FUNCNAME[1]}() - ${BASH_LINENO[1]}: ERROR Invalid or missing paramter: need number but got '$1'" > /dev/stderr
+	return 1;
+}
+
+# Helper to validate VM number
+# param $1 VM number
+#
+function vm_num_is_valid()
+{
+	[[ "$1" =~ [0-9]+ ]] && return 0
+
+	echo "${FUNCNAME[1]}() - ${BASH_LINENO[1]}: ERROR Invalid or missing paramter: vm number '$1'" > /dev/stderr
+	return 1;
+}
+
+
+# Print network socket for given VM number
+# param $1 virtual machine number
+#
+function vm_ssh_socket()
+{
+	vm_num_is_valid $1 || return 1
+	local vm_dir="$VM_BASE_DIR/$1"
+
+	cat $vm_dir/ssh_socket
+}
+
+function vm_fio_socket()
+{
+	vm_num_is_valid $1 || return 1
+	local vm_dir="$VM_BASE_DIR/$1"
+
+	cat $vm_dir/fio_socket
+}
+
+# Execute ssh command on given VM
+# param $1 virtual machine number
+#
+function vm_ssh()
+{
+	vm_num_is_valid $1 || return 1
+	local ssh_config="$VM_BASE_DIR/ssh_config"
+	if [[ ! -f $ssh_config ]]; then
+		(
+		echo "Host *"
+		echo "	ControlPersist=10m"
+		echo "	ConnectTimeout=2"
+		echo "	Compression=no"
+		echo "	ControlMaster=auto"
+		echo "	UserKnownHostsFile=/dev/null"
+		echo "	StrictHostKeyChecking=no"
+		echo "	User root"
+		echo "  ControlPath=$VM_BASE_DIR/%r@%h:%p.ssh"
+		echo ""
+		) > $ssh_config
+	fi
+
+	local ssh_cmd="ssh -i $SPDK_VHOST_SSH_KEY_FILE -F $ssh_config \
+		-p $(vm_ssh_socket $1) 127.0.0.1"
+
+	shift
+	$ssh_cmd "$@"
+}
+
+# check if specified VM is running
+# param $1 VM num
+function vm_is_running()
+{
+	vm_num_is_valid $1 || return 1
+	local vm_dir="$VM_BASE_DIR/$1"
+
+	if [[ ! -r $vm_dir/qemu.pid ]]; then
+		return 1
+	fi
+
+	local vm_pid="$(cat $vm_dir/qemu.pid)"
+
+	if /bin/kill -0 $vm_pid; then
+		return 0
+	else
+		if [[ $EUID -ne 0 ]]; then
+			echo "WARNING: not root - assuming we running since can't be checked"
+			return 0
+		fi
+
+		# not running - remove pid file
+		rm $vm_dir/qemu.pid
+		return 1
+	fi
+}
+
+# check if specified VM is running
+# param $1 VM num
+function vm_os_booted()
+{
+	vm_num_is_valid $1 || return 1
+	local vm_dir="$VM_BASE_DIR/$1"
+
+	if [[ ! -r $vm_dir/qemu.pid ]]; then
+		error "VM $1 is not running"
+		return 1
+	fi
+
+	if ! vm_ssh $1 "true" 2>/dev/null; then
+		return 1
+	fi
+
+	return 0
+}
+
+
+# Shutdown given VM
+# param $1 virtual machine number
+# return non-zero in case of error.
+function vm_shutdown()
+{
+	vm_num_is_valid $1 || return 1
+	local vm_dir="$VM_BASE_DIR/$1"
+	if [[ ! -d "$vm_dir" ]]; then
+		error "VM$1 ($vm_dir) not exist - setup it first"
+		return 1
+	fi
+
+	if ! vm_is_running $1; then
+		echo "INFO: VM$1 ($vm_dir) is not running"
+		return 0
+	fi
+
+	echo "Shutting down virtual machine $vm_dir"
+	if vm_ssh $1 "nohup sh -c 'shutdown -h -P now'; exit 0"; then
+		echo "INFO: VM$1 is shutting down - wait a while to complete"
+		return 0
+	else
+		error "VM$1 shutting FAILED"
+		return 1
+	fi
+}
+
+# Kill given VM
+# param $1 virtual machine number
+#
+function vm_kill()
+{
+	vm_num_is_valid $1 || return 1
+	local vm_dir="$VM_BASE_DIR/$1"
+
+	if [[ ! -r $vm_dir/qemu.pid ]]; then
+		#echo "WARN: VM$1 pid not found - not killing"
+		return 0
+	fi
+
+	local vm_pid="$(cat $vm_dir/qemu.pid)"
+
+	echo "Killing virtual machine $vm_dir (pid=$vm_pid)"
+	# First kill should fail, second one must fail
+	if /bin/kill $vm_pid; then
+		echo "INFO: process $vm_pid killed"
+		rm $vm_dir/qemu.pid
+	elif vm_is_running $1; then
+		erorr "Process $vm_pid NOT killed"
+		return 1
+	fi
+}
+
+# Kills all VM in $VM_BASE_DIR
+#
+function vm_kill_all()
+{
+	for vm in $VM_BASE_DIR/[0-9]*; do
+		vm_kill $(basename $vm)
+	done
+}
+
+# Shutdown all VM in $VM_BASE_DIR
+#
+function vm_shutdown_all()
+{
+	for vm in $VM_BASE_DIR/[0-9]*; do
+		vm_shutdown $(basename $vm)
+	done
+}
+
+function vm_setup()
+{
+	local OPTIND optchar a
+
+	local os=""
+	local qemu_args=""
+	local disk_type=NOT_DEFINED
+	local disks=""
+	local raw_cache=""
+	local force_vm=""
+	while getopts ':-:' optchar; do
+		case "$optchar" in
+			-)
+			case "$OPTARG" in
+				os=*) local os="${OPTARG#*=}" ;;
+				os-mode=*) local os_mode="${OPTARG#*=}" ;;
+				qemu-args=*) local qemu_args="${qemu_args} ${OPTARG#*=}" ;;
+				disk-type=*) local disk_type="${OPTARG#*=}" ;;
+				disks=*) local disks="${OPTARG#*=}" ;;
+				raw-cache=*) local raw_cache=",cache${OPTARG#*=}" ;;
+				force=*) local force_vm=${OPTARG#*=} ;;
+				*)
+					error "unknown argument $OPTARG"
+					return 1
+			esac
+			;;
+			*)
+				error "vm_create Unknown param $OPTARG"
+				return 1
+			;;
+		esac
+	done
+
+	# Find next directory we can use
+	if [[ ! -z $force_vm ]]; then
+		vm_num=$force_vm
+
+		vm_num_is_valid $vm_num || return 1
+		local vm_dir="$VM_BASE_DIR/$vm_num"
+		[[ -d $vm_dir ]] && echo "WARNING: removing existing VM in '$vm_dir'"
+		echo "rm -rf $vm_dir"
+	else
+		local vm_dir=""
+		for (( i=0; i<=256; i++)); do
+			local vm_dir="$VM_BASE_DIR/$i"
+			[[ ! -d $vm_dir ]] && break
+		done
+
+		vm_num=$i
+	fi
+
+	if [[ $i -eq 256 ]]; then
+		error "no free VM found. do some cleanup (256 VMs created, are you insane?)"
+		return 1
+	fi
+
+	echo "INFO: Creating new VM in $vm_dir"
+	mkdir -p $vm_dir
+	if [[ ! -r $os ]]; then
+		error "file not found: $os"
+		return 1
+	fi
+	# WARNING:
+	# each cmd+= must contain ' ${eol}' at the end
+	#
+	local eol="\\\\\n  "
+	local task_mask=$(cat $BASE_DIR/autotest.config|grep qemu_mask|awk -F'=' '{print $2}'|sed "$(($vm_num+1))q;d")
+	echo "INFO: TASK MASK: $task_mask"
+	local cmd="taskset -a $task_mask $INSTALL_DIR/bin/qemu-system-x86_64 ${eol}"
+
+	local vm_socket_offset=$(( 10000 + 100 * vm_num ))
+
+	local ssh_socket=$(( vm_socket_offset + 0 ))
+	local fio_socket=$(( vm_socket_offset + 1 ))
+	local http_socket=$(( vm_socket_offset + 2 ))
+	local https_socket=$(( vm_socket_offset + 3 ))
+	local gdbserver_socket=$(( vm_socket_offset + 4 ))
+	local vnc_socket=$(( 100 + vm_num ))
+	local qemu_pid_file="$vm_dir/qemu.pid"
+	local cpu_num=0
+
+	for ((cpu=0; cpu<$(nproc --all); cpu++))
+	do
+		(($task_mask&1<<$cpu)) && ((cpu_num++)) || :
+	done
+
+	#-cpu host
+	local node_num=$(cat $BASE_DIR/autotest.config|grep qemu_numa_node|awk -F'=' '{print $2}'|sed "$(($vm_num+1))q;d")
+	echo "INFO: NUMA NODE: $node_num"
+	cmd+="-m 1024 --enable-kvm -smp $cpu_num -vga std -vnc :$vnc_socket -daemonize -snapshot ${eol}"
+	cmd+="-object memory-backend-file,id=mem,size=1G,mem-path=/dev/hugepages,share=on,prealloc=yes,host-nodes=$node_num,policy=bind ${eol}"
+	cmd+="-numa node,memdev=mem ${eol}"
+	cmd+="-pidfile $qemu_pid_file ${eol}"
+	cmd+="-serial file:$vm_dir/serial.log ${eol}"
+	cmd+="-D $vm_dir/qemu.log ${eol}"
+	cmd+="-net user,hostfwd=tcp::$ssh_socket-:22,hostfwd=tcp::$fio_socket-:8765,hostfwd=tcp::$https_socket-:443,hostfwd=tcp::$http_socket-:80 ${eol}"
+	cmd+="-net nic ${eol}"
+
+	cmd+="-hda $os ${eol}"
+
+	IFS=':'
+
+	if ( [[ $disks == '' ]] && [[ $disk_type == virtio* ]] ); then
+		disks=1
+	fi
+
+	for disk in $disks; do
+		case $disk_type in
+			virtio)
+				local raw_name="RAWSCSI"
+				local raw_disk=$vm_dir/test.img
+
+				if [[ ! -z $disk ]]; then
+					[[ ! -b $disk ]] && touch $disk
+					local raw_disk=$(readlink -f $disk)
+				fi
+
+				# Create disk file if it not exist or it is smaller than 10G
+				if ( [[ -f $raw_disk ]] && [[ $(stat --printf="%s" $raw_disk) -lt $((1024 * 1024 * 1024 * 10)) ]] ) || \
+					[[ ! -e $raw_disk ]]; then
+					if [[ $raw_disk =~ /dev/.* ]]; then
+						error \
+							"ERROR: Virtio disk point to missing device ($raw_disk) - \n" \
+							"       this is probably not what you want."
+							return 1
+					fi
+
+					echo "INFO: Creating Virtio disc $raw_disk"
+					dd if=/dev/zero of=$raw_disk bs=1024k count=10240
+				else
+					echo "INFO: Using existing image $raw_disk"
+				fi
+
+				cmd+="-device virtio-scsi-pci ${eol}"
+				cmd+="-device scsi-hd,drive=hd$i,vendor=$raw_name ${eol}"
+				cmd+="-drive if=none,id=hd$i,file=$raw_disk,format=raw$raw_cache ${eol}"
+				;;
+			spdk_vhost)
+				echo "INFO: using socket $SPDK_VHOST_SCSI_TEST_DIR/naa.$disk.$vm_num"
+
+				cmd+="-chardev socket,id=char_$disk,path=$SPDK_VHOST_SCSI_TEST_DIR/naa.$disk.$vm_num ${eol}"
+				cmd+="-device vhost-scsi-pci,id=scsi_$disk,wwpn=unused,num_queues=$cpu_num,user=true,chardev=char_$disk ${eol}"
+				;;
+			kernel_vhost)
+				if [[ -z $disk ]]; then
+					error "need WWN for $disk_type"
+					return 1
+				elif [[ ! $disk =~ ^[[:alpha:]]{3}[.][[:xdigit:]]+$ ]]; then
+					error "$disk_type - disk(wnn)=$disk does not look like WNN number"
+					return 1
+				fi
+				echo "Using kernel vhost disk wwn=$disk"
+				cmd+=" -device vhost-scsi-pci,wwpn=$disk ${eol}"
+				;;
+			*)
+				error "unknown mode '$disk_type', use: virtio, spdk_vhost or kernel_vhost"
+				return 1
+		esac
+	done
+
+	[[ ! -z $qemu_args ]] && cmd+=" $qemu_args ${eol}"
+	# remove last $eol
+	cmd="${cmd%\\\\\\n  }"
+
+	echo "Saving to $vm_dir/run.sh:"
+	(
+	echo '#!/bin/bash'
+	echo 'if [[ $EUID -ne 0 ]]; then '
+	echo '	echo "Go away user come back as root"'
+	echo '	exit 1'
+	echo 'fi';
+	echo
+	echo -e "qemu_cmd=\"$cmd\"";
+	echo
+	echo "echo 'Running VM in $vm_dir'"
+	echo "rm -f $qemu_pid_file"
+	echo '$qemu_cmd'
+	echo "echo 'Waiting for QEMU pid file'"
+	echo "[[ ! -f $qemu_pid_file ]] && sleep 1"
+	echo "[[ ! -f $qemu_pid_file ]] && echo 'ERROR: no qemu pid file found' && exit 1"
+	echo
+	echo "chmod +r $vm_dir/*"
+	echo
+	echo '# EOF'
+	) > $vm_dir/run.sh
+	chmod +x $vm_dir/run.sh
+
+	# Save generated sockets redirection
+	echo $ssh_socket > $vm_dir/ssh_socket
+	echo $fio_socket > $vm_dir/fio_socket
+	echo $http_socket > $vm_dir/http_socket
+	echo $https_socket > $vm_dir/https_socket
+	echo $gdbserver_socket > $vm_dir/gdbserver_socket
+	echo $vnc_socket >> $vm_dir/vnc_socket
+}
+
+function vm_run()
+{
+	local OPTIND optchar a
+	local run_all=false
+	while getopts 'a-:' optchar; do
+		case "$optchar" in
+			a) run_all=true ;;
+			*)
+				echo "vm_run Unknown param $OPTARG"
+				return 1
+			;;
+		esac
+	done
+
+	local vms_to_run=""
+
+	if $run_all; then
+		shopt -s nullglob
+		vms_to_run=$VM_BASE_DIR/[0-9]*
+	else
+		shift $((OPTIND-1))
+		for vm in $@; do
+			vm_num_is_valid $1 || return 1
+			if [[ ! -x $VM_BASE_DIR/$vm/run.sh ]]; then
+				error "VM$vm not defined - setup it first"
+				return 1
+			fi
+			vms_to_run+=" $VM_BASE_DIR/$vm"
+		done
+	fi
+
+	for vm in $vms_to_run; do
+		if vm_is_running $(basename $vm); then
+			echo "WARNING: VM$(basename $vm) ($vm) already running"
+			continue
+		fi
+
+		echo "INFO: running $vm/run.sh"
+		if ! $vm/run.sh; then
+			error "FAILED to run vm $vm"
+			return 1
+		fi
+	done
+}
+
+# Wait for all created VMs to boot.
+# param $1 max wait time
+function vm_wait_for_boot()
+{
+	assert_number $1
+
+	local all_booted=false
+	local timeout_time=$1
+	[[ $timeout_time -lt 10 ]] && timeout_time=10
+	local timeout_time=$(date -d "+$timeout_time seconds" +%s)
+
+	echo "Waiting for VMs to boot"
+	shift
+	if [[ "$@" == "" ]]; then
+		local vms_to_check="$VM_BASE_DIR/[0-9]*"
+	else
+		local vms_to_check=""
+		for vm in $@; do
+			vms_to_check+=" $VM_BASE_DIR/$vm"
+		done
+	fi
+
+	for vm in $vms_to_check; do
+		local vm_num=$(basename $vm)
+		local i=0
+		echo "INFO: waiting for VM$vm_num ($vm)"
+		while ! vm_os_booted $vm_num; do
+			if ! vm_is_running $vm_num; then
+				echo
+				echo "ERROR: VM $vm_num is not running"
+				echo "================"
+				echo "QEMU LOG:"
+				if [[ -r $vm/qemu.log ]]; then
+					cat $vm/qemu.log
+				else
+					echo "LOG not found"
+				fi
+
+				echo "VM LOG:"
+				if [[ -r $vm/serial.log ]]; then
+					cat $vm/serial.log
+				else
+					echo "LOG not found"
+				fi
+				echo "================"
+				return 1
+			fi
+
+			if [[ $(date +%s) -gt $timeout_time ]]; then
+				error "timeout waiting for machines to boot"
+				return 1
+			fi
+			if (( i > 30 )); then
+				local i=0
+				echo
+			fi
+			echo -n "."
+			sleep 1
+		done
+		echo ""
+		echo "INFO: VM$vm_num ready"
+	done
+
+	echo "INFO: all VMs ready"
+	return 0
+}
+
+function vm_start_fio_server()
+{
+	local OPTIND optchar
+	local readonly=''
+	while getopts ':-:' optchar; do
+		case "$optchar" in
+			-)
+			case "$OPTARG" in
+				fio-bin=*) local fio_bin="${OPTARG#*=}" ;;
+				readonly) local readonly="--readonly" ;;
+				*) echo "Invalid argument '$OPTARG'" && return 1;;
+			esac
+			;;
+			*) echo "Invalid argument '$OPTARG'" && return 1;;
+		esac
+	done
+
+	shift $(( OPTIND - 1 ))
+	for vm_num in $@; do
+		echo "INFO: Starting fio server on VM$vm_num"
+		if [[ $fio_bin != "" ]]; then
+			cat $fio_bin | vm_ssh $vm_num 'cat > /root/fio; chmod +x /root/fio'
+			vm_ssh $vm_num /root/fio $readonly --eta=never --server --daemonize=/root/fio.pid
+		else
+			vm_ssh $vm_num fio $readonly --eta=never --server --daemonize=/root/fio.pid
+		fi
+	done
+}
+
+function vm_check_scsi_location()
+{
+	# Script to find wanted disc
+	local script='shopt -s nullglob; \
+	for entry in /sys/block/sd*; do \
+		disk_type="$(cat $entry/device/vendor)"; \
+		if [[ $disk_type == INTEL* ]] || [[ $disk_type == RAWSCSI* ]] || [[ $disk_type == LIO-ORG* ]]; then \
+			fname=$(basename $entry); \
+			echo -n "$fname "; \
+		fi; \
+	done'
+
+	SCSI_DISK="$(echo "$script" | vm_ssh $1 bash -s)"
+
+	if [[ -z "$SCSI_DISK" ]]; then
+		error "no test disk found!"
+		return 1
+	fi
+}
+
+# Script to perform scsi device reset on all disks in VM
+# param $1 VM num
+# param $2..$n Disks to perform reset on
+function vm_reset_scsi_devices()
+{
+	for disk in "${@:2}"; do
+		echo "INFO: VM$1 Performing device reset on disk $disk"
+		vm_ssh $1 sg_reset /dev/$disk -vNd
+		sleep 2
+	done
+}
+
+# Shutdown or kill any running VM and SPDK APP.
+#
+function at_app_exit()
+{
+	echo "INFO: APP EXITING"
+	echo "INFO: killing all VMs"
+	vm_kill_all
+	# Kill vhost application
+	echo "INFO: killing vhost app"
+	spdk_vhost_kill
+
+	echo "INFO: EXIT DONE"
+}
+
+function error_exit()
+{
+	trap - ERR
+	set +e
+	echo "Error on $1 $2"
+
+	at_app_exit
+	exit 1
+}
diff --git a/test/vhost/fiotest/fio_jobs/default_integrity.job b/test/vhost/fiotest/fio_jobs/default_integrity.job
new file mode 100644
index 000000000..79248f080
--- /dev/null
+++ b/test/vhost/fiotest/fio_jobs/default_integrity.job
@@ -0,0 +1,18 @@
+[global]
+blocksize=4k
+iodepth=512
+iodepth_batch=128
+iodepth_low=256
+ioengine=libaio
+size=1G
+io_size=4G
+filename=
+group_reporting
+thread
+numjobs=1
+direct=1
+rw=randwrite
+do_verify=1
+verify=meta
+verify_backlog=1024
+[nvme-host]
diff --git a/test/vhost/fiotest/fio_jobs/default_performance.job b/test/vhost/fiotest/fio_jobs/default_performance.job
new file mode 100644
index 000000000..244aa40fa
--- /dev/null
+++ b/test/vhost/fiotest/fio_jobs/default_performance.job
@@ -0,0 +1,15 @@
+[global]
+blocksize=4k
+iodepth=512
+iodepth_batch=128
+iodepth_low=256
+ioengine=libaio
+size=10G
+filename=
+ramp_time=10
+group_reporting
+thread
+numjobs=1
+direct=1
+rw=randread
+[nvme-host]
diff --git a/test/vhost/fiotest/run_fio.py b/test/vhost/fiotest/run_fio.py
new file mode 100755
index 000000000..1a235570e
--- /dev/null
+++ b/test/vhost/fiotest/run_fio.py
@@ -0,0 +1,312 @@
+#!/usr/bin/env python
+
+import os
+import sys
+import getopt
+import subprocess
+import itertools
+import datetime
+import signal
+import re
+
+fio_bin = "fio"
+perf_vmex = False
+
+fio_template = """
+[global]
+ioengine=%(ioengine)s
+size=%(size)s
+filename=%(filename)s
+numjobs=%(numjobs)s
+bs=%(blocksize)s
+iodepth=%(iodepth)s
+direct=%(direct)s
+rw=%(testtype)s
+group_reporting
+thread
+%(verify)s
+
+[nvme-host]
+"""
+
+
+def show_help(fio_args_dict):
+    print("""Usage: python run_fio.py [options] [args]
+    Args:
+          [VMs] (ex. vm1_IP:vm1_port,vm2_IP:vm2_port,etc...)
+          [fio filename arg], ex. /dev/sda)
+    Options:
+        -h, --help        Show this message.
+        -j, --job-files   Paths to files with custom FIO jobs configuration.
+        -F, --fio-bin     Location of FIO binary (Default "fio")
+        -s, --size        Size of IO for job. Will be distributed among
+                          number of numjobs (Default: %(size)s)
+        -t, --testtype    Type of FIO test (Default: %(testtype)s)
+        -b, --blocksize   Blocksize for FIO test (Default: %(blocksize)s)
+        -i, --iodepth     IO depth for FIO test (Default: %(iodepth)s)
+        -I, --ioengine    Type of FIO ioengine to use (Default: %(ioengine)s)
+        -n, --numjobs     Number of threads for job (Default: %(numjobs)s)
+        -D, --direct      Use non-buffered IO? (Default: %(direct)s)
+        -v, --verify      Verify after writing to file (Default: %(verify)s)
+        -o, --out         Directory used to save generated job files and
+                          files with test results (Default: same dir where
+                          this script is located)
+        -p, --perf-vmex   Enable aggregating statistic for VMEXITS
+    """ % fio_args_dict)
+
+
+def exec_cmd(cmd, blocking):
+    # Print result to STDOUT for now, we don't have json support yet.
+    p = subprocess.Popen(cmd.split(" "), stdout=subprocess.PIPE,
+                         stderr=subprocess.STDOUT, stdin=subprocess.PIPE)
+    if blocking is True:
+        out, _ = p.communicate()
+        return p.returncode, out
+    return p
+
+
+def save_file(path, mode, contents):
+    with open(path, mode) as fh:
+        fh.write(contents)
+    fh.close()
+
+
+def prep_fio_cfg_file(out_dir, fio_cfg, vm_nb):
+    job_file = os.path.join(out_dir, "fio_job_vm{0}".format(vm_nb))
+    print "file {0} written".format(job_file)
+    save_file(job_file, "w", fio_cfg)
+    return job_file
+
+
+def calc_size(size, numjobs):
+    return str(int(filter(lambda x: x.isdigit(), size)) / int(numjobs)) + \
+        filter(lambda x: x.isalpha(), size)
+
+
+def cfg_product(fio_args_dict):
+    return (dict(zip(fio_args_dict, x)) for
+            x in itertools.product(*fio_args_dict.itervalues()))
+
+
+def run_fio(vms, fio_cfg_file, out_path):
+
+        global perf_vmex
+        # Prepare command template for FIO
+        fio_cmd = fio_bin
+        fio_cmd = " ".join([fio_cmd, "--eta=never"])
+        print fio_cfg_file
+        fio_cfg_name = (os.path.basename(fio_cfg_file)).split(".")[0]
+        for i, vm in enumerate(vms):
+            print("Starting thread {0} for VM: {1}".format(i, vm))
+
+            # vm[0] = IP address, vm[1] = Port number
+            fio_cmd = " ".join([fio_cmd,
+                                "--client={0},{1}".format(vm[0], vm[1])])
+            fio_cmd = " ".join([fio_cmd,
+                                "--remote-config /root/fio.job{0}".format(i)])
+
+        print fio_cmd
+
+        if perf_vmex:
+            # Start gathering perf statistics for host and VM guests
+            perf_rec_file = os.path.join(out_path, "perf.data.kvm")
+            perf_run_cmd = "perf kvm --host --guest " + \
+                           "-o {0} stat record -a".format(perf_rec_file)
+            print perf_run_cmd
+            perf_p = exec_cmd(perf_run_cmd, blocking=False)
+
+        # Run FIO test on VMs
+        rc, out = exec_cmd(fio_cmd, blocking=True)
+
+        # if for some reason output contains lines with "eta" - remove them
+        out = re.sub(r'.+\[eta\s+\d{2}m\:\d{2}s\]', '', out)
+
+        if rc != 0:
+            print(rc, out)
+            return rc
+        else:
+            print out
+            save_file(os.path.join(out_path, "".join([fio_cfg_name, ".log"])), "w", out)
+            # out = out[out.find("Disk"):]
+            # out = out[out.find(":")+2:]
+            # JSON format nos supported on Debian IMG for now, not parsing
+            # data = json.loads(out)
+            # pprint(data)
+            pass
+
+        if perf_vmex:
+            # Stop gathering perf statistics and prepare some result files
+            perf_p.send_signal(signal.SIGINT)
+            perf_p.wait()
+
+            perf_stat_cmd = "perf kvm --host " + \
+                            "-i {0} stat report".format(perf_rec_file)
+
+            print(" ".join([perf_stat_cmd, "--event vmexit"]))
+            rc, out = exec_cmd(" ".join([perf_stat_cmd, "--event vmexit"]),
+                               blocking=True)
+
+            print("VMexit host stats:")
+            print("{0}".format(out))
+            save_file(os.path.join(out_path, "vmexit_stats"),
+                      "w", "{0}".format(out))
+
+
+def main():
+
+    abspath = os.path.abspath(__file__)
+    dname = os.path.dirname(abspath)
+    os.chdir(os.path.join(dname, "../../.."))
+
+    global fio_bin
+    global perf_vmex
+    job_file_opt = False
+    vms = []
+    split_disks = []
+    filenames = ""
+    out_dir = os.path.join(os.getcwd(), "fio_results")
+    fio_cfg_files = []
+    rc = 0
+
+    fio_args_def = {
+        'size': ["10G"],
+        'numjobs': ["1"],
+        'testtype': ["randread"],
+        'blocksize': ["4k"],
+        'iodepth': ["128"],
+        'ioengine': ["libaio"],
+        'direct': ["1"],
+        'verify': [""]
+    }
+
+    fio_args = fio_args_def.copy()
+
+    try:
+        opts, args = getopt.getopt(sys.argv[1:], "hj:t:b:i:D:n:F:I:s:v:o:S:p",
+                                   ["help", "job-file=", "testtype=",
+                                    "blocksize=", "iodepth=", "direct=",
+                                    "numjobs=", "fio-bin=", "ioengine=",
+                                    "size=", "verify=", "out=",
+                                    "split-disks=", "perf"])
+    except:
+        show_help(fio_args_def)
+        sys.exit(1)
+
+    for o, a in opts:
+        print o, a
+        if o in ("-j", "--job-file"):
+            fio_cfg_files = a.split(",")
+            job_file_opt = True
+            fio_args = fio_args_def.copy()
+        elif o in ("-h", "--help"):
+            show_help(fio_args_def)
+            sys.exit(1)
+        elif o in ("-p", "--perf-vmex"):
+            perf_vmex = True
+        elif o in ("-o", "--out"):
+            out_dir = os.path.join(a, "fio_results")
+        elif o in ("-F", "--fio-bin"):
+            fio_bin = a
+        elif o in ("-S", "--split-disks"):
+            split_disks = [x.split("-") for x in a.split(",")]
+            split_disks = [[int(x) - 1 for x in y] for y in split_disks]
+            print split_disks
+        elif o in ("-s", "--size"):
+            fio_args["size"] = a.split(",")
+        elif o in ("-t", "--testtype"):
+            fio_args["testtype"] = a.split(",")
+        elif o in ("-b", "--blocksize"):
+            fio_args["blocksize"] = a.split(",")
+        elif o in ("-i", "--iodepth"):
+            fio_args["iodepth"] = a.split(",")
+        elif o in ("-D", "--direct"):
+            fio_args["direct"] = a.split(",")
+        elif o in ("-n", "--numjobs"):
+            fio_args["numjobs"] = a.split(",")
+        elif o in ("-I", "--ioengine"):
+            fio_args["ioengine"] = a.split(",")
+        elif o in ("-v", "--verify"):
+            fio_args["verify"] = a.split(",")
+            fio_args["verify"] = ["" if x in "0" else
+                                  "verify=crc32" for x in fio_args["verify"]]
+
+    if len(args) < 1:
+        show_help(fio_args_def)
+        sys.exit(1)
+    else:
+        # Get IP, Port tuples from args and filename for fio config
+        vms = [tuple(x.split(":")) for x in args[0].split(",")]
+        filenames = [["/dev/" + y for y in x[2:]] for x in vms]
+        vms = [x[0:2] for x in vms]
+
+    if not os.path.exists(out_dir):
+        os.mkdir(out_dir)
+
+    if job_file_opt is True:
+        for fio_cfg in fio_cfg_files:
+            print("Running job file: {0}".format(fio_cfg))
+
+            for i, vm in enumerate(zip(vms, filenames)):
+                fnames = vm[1]
+                if split_disks:
+                    if len(split_disks[i]) < 2:
+                        filename = fnames[split_disks[i][0]:split_disks[i][0] + 1]
+                        filename = ":".join(filename)
+                    else:
+                        filename = fnames[split_disks[i][0]:split_disks[i][1] + 1]
+                        filename = ":".join(filename)
+                else:
+                    filename = ":".join(fnames)
+
+                a = exec_cmd("./test/vhost/fiotest/vm_ssh.sh " +
+                             "{0} sh -c 'rm fio.job{1}'"
+                             .format(i, i), blocking=True)
+
+                for cfg in fio_cfg.split("\n"):
+                    with open(cfg, "r") as fh:
+                        lines = fh.readlines()
+                        for line in lines:
+                            if "filename" in line:
+                                line = "filename=" + filename
+                            a = exec_cmd("./test/vhost/fiotest/vm_ssh.sh " +
+                                         "{0} sh -c 'echo {1} >> fio.job{2}'"
+                                         .format(i, line.strip(), i), blocking=True)
+                    fh.close()
+            rc = run_fio(vms, fio_cfg, out_dir)
+    else:
+        for cfg in cfg_product(fio_args):
+            # Update fio "size" parameter so that total work done by
+            # all numjobs is equal to assigned size and not size*numjobs
+            cfg["size"] = calc_size(cfg["size"], cfg["numjobs"])
+
+        # Prepare this test run FIO job file
+            for i, vm in enumerate(zip(vms, filenames)):
+                fnames = vm[1]
+                if split_disks:
+                    if len(split_disks[i]) < 2:
+                        filename = fnames[split_disks[i][0]:split_disks[i][0] + 1]
+                        filename = ":".join(filename)
+                    else:
+                        filename = fnames[split_disks[i][0]:split_disks[i][1] + 1]
+                        filename = ":".join(filename)
+                else:
+                    filename = ":".join(fnames)
+
+                cfg.update({"filename": filename})
+                fio_cfg = fio_template % cfg
+                fio_cfg_files.append(prep_fio_cfg_file(out_dir,
+                                                       fio_cfg, i))
+                a = exec_cmd("./test/vhost/fiotest/vm_ssh.sh " +
+                             "{0} sh -c 'rm fio.job{1}'"
+                             .format(i, i), blocking=True)
+                for line in fio_cfg.split("\n"):
+                    a = exec_cmd("./test/vhost/fiotest/vm_ssh.sh " +
+                                 "{0} sh -c 'echo {1} >> fio.job{2}'"
+                                 .format(i, line.strip(), i), blocking=True)
+
+            rc = run_fio(vms, cfg, out_dir)
+
+    return rc
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/test/vhost/fiotest/run_vhost.sh b/test/vhost/fiotest/run_vhost.sh
new file mode 100755
index 000000000..2964b539f
--- /dev/null
+++ b/test/vhost/fiotest/run_vhost.sh
@@ -0,0 +1,49 @@
+#!/usr/bin/env bash
+
+BASE_DIR=$(readlink -f $(dirname $0))
+[[ -z "$TEST_DIR" ]] && TEST_DIR="$(cd $BASE_DIR/../../../../ && pwd)"
+
+function usage()
+{
+	[[ ! -z $2 ]] && ( echo "$2"; echo ""; )
+	echo "Shortcut script for running vhost app."
+	echo "Usage: $(basename $1) [-x] [-h|--help] [--clean-build] [--work-dir=PATH]"
+	echo "-h, --help           print help and exit"
+	echo "-x                   Set -x for script debug"
+	echo "    --gdb            Run app under gdb"
+	echo "    --gdbserver      Run app under gdb-server"
+	echo "    --work-dir=PATH  Where to find source/project. [default=$TEST_DIR]"
+
+	exit 0
+}
+
+run_in_background=false
+while getopts 'xh-:' optchar; do
+	case "$optchar" in
+		-)
+		case "$OPTARG" in
+			help) usage $0 ;;
+			gdb) VHOST_GDB="gdb --args" ;;
+			gdbserver) VHOST_GDB="gdbserver 127.0.0.1:12345"
+				;;
+			work-dir=*) TEST_DIR="${OPTARG#*=}" ;;
+			*) usage $0 echo "Invalid argument '$OPTARG'" ;;
+		esac
+		;;
+	h) usage $0 ;;
+	x) set -x ;;
+	*) usage $0 "Invalid argument '$optchar'" ;;
+	esac
+done
+
+if [[ $EUID -ne 0 ]]; then
+	echo "Go away user come back as root"
+	exit 1
+fi
+
+echo "INFO: $0"
+echo
+
+. $BASE_DIR/common.sh
+
+spdk_vhost_run
diff --git a/test/vhost/fiotest/vhost.conf b/test/vhost/fiotest/vhost.conf
new file mode 100644
index 000000000..5a7161cc1
--- /dev/null
+++ b/test/vhost/fiotest/vhost.conf
@@ -0,0 +1,41 @@
+# vhost configuration file
+#
+# Please write all parameters using ASCII.
+# The parameter must be quoted if it includes whitespace.
+#
+# Configuration syntax:
+# Spaces at head of line are deleted, other spaces are as separator
+# Lines starting with '#' are comments and not evaluated.
+# Lines ending with '\' are concatenated with the next line.
+# Bracketed keys are section keys grouping the following value keys.
+# Number of section key is used as a tag number.
+#  Ex. [TargetNode1] = TargetNode section key with tag number 1
+[Global]
+  # Users can restrict work items to only run on certain cores by
+  #  specifying a WorkerMask.  Default is to allow work items to run
+  #  on all cores.
+  #WorkerMask 0xFFFF
+
+  # Event mask for ids history buffers
+  # Default: 0x0 (all events disabled)
+  # Set to 0xFFFFFFFFFFFFFFFF to enable all events.
+  #EventMask 0x0
+
+  # syslog facility
+  LogFacility "local7"
+
+[Rpc]
+  # Defines whether vhost will enable configuration via RPC.
+  # Default is disabled.  Note that the RPC interface is not
+  # authenticated, so users should be careful about enabling
+  # RPC in non-trusted environments.
+  Enable Yes
+
+[Ioat]
+  Disable Yes
+
+[Nvme]
+  ClaimAllDevices
+
+[Split]
+  Split Nvme0n1 4
diff --git a/test/vhost/fiotest/vm_run.sh b/test/vhost/fiotest/vm_run.sh
new file mode 100755
index 000000000..6999e38b0
--- /dev/null
+++ b/test/vhost/fiotest/vm_run.sh
@@ -0,0 +1,48 @@
+#!/usr/bin/env bash
+
+BASE_DIR=$(readlink -f $(dirname $0))
+[[ -z "$TEST_DIR" ]] && TEST_DIR="$(cd $BASE_DIR/../../../../ && pwd)"
+
+function usage()
+{
+	[[ ! -z $2 ]] && ( echo "$2"; echo ""; )
+	echo "Shortcut script for enabling VMs"
+	echo "Usage: $(basename $1) [OPTIONS] VM..."
+	echo
+	echo "-h, --help                print help and exit"
+	echo "    --work-dir=WORK_DIR   Where to find build file. Must exist. [default: ./..]"
+	echo "-a                        Run all VMs in WORK_DIR"
+	echo "-x                        set -x for script debug"
+	exit 0
+}
+run_all=false
+while getopts 'xah-:' optchar; do
+	case "$optchar" in
+		-)
+		case "$OPTARG" in
+			help) usage $0 ;;
+			work-dir=*) TEST_DIR="${OPTARG#*=}" ;;
+			*) usage $0 "Invalid argument '$OPTARG'" ;;
+		esac
+		;;
+	h) usage $0 ;;
+	a) run_all=true ;;
+	x) set -x ;;
+	*) usage $0 "Invalid argument '$OPTARG'"
+	esac
+done
+
+. $BASE_DIR/common.sh
+
+if [[ $EUID -ne 0 ]]; then
+	echo "Go away user come back as root"
+	exit 1
+fi
+
+if $run_all; then
+	vm_run -a
+else
+	shift $((OPTIND-1))
+	echo "INFO: running VMs: $@"
+	vm_run "$@"
+fi
diff --git a/test/vhost/fiotest/vm_setup.sh b/test/vhost/fiotest/vm_setup.sh
new file mode 100755
index 000000000..f650fce30
--- /dev/null
+++ b/test/vhost/fiotest/vm_setup.sh
@@ -0,0 +1,78 @@
+#!/usr/bin/env bash
+
+BASE_DIR=$(readlink -f $(dirname $0))
+[[ -z "$TEST_DIR" ]] && TEST_DIR="$(cd $BASE_DIR/../../../../ && pwd)"
+
+function usage()
+{
+	[[ ! -z $2 ]] && ( echo "$2"; echo ""; )
+	echo "Shortcut script for setting up VMs for tests"
+	echo "Usage: $(basename $1) [OPTIONS] VM_NUM"
+	echo
+	echo "-h, --help                print help and exit"
+	echo "-f VM_NUM                 Force VM_NUM reconfiguration if already exist"
+	echo "    --work-dir=WORK_DIR   Where to find build file. Must exit. (default: $TEST_DIR)"
+	echo "    --test-type=TYPE      Perform specified test:"
+	echo "                          virtio - test host virtio-scsi-pci using file as disk image"
+	echo "                          kernel_vhost - use kernel driver vhost-scsi"
+	echo "                          spdk_vhost - use spdk vhost"
+	echo "    ---cache=CACHE        Use CACHE for virtio test: "
+	echo "                          writethrough, writeback, none, unsafe or directsyns"
+	echo "                          Default is writethrough"
+	echo "    --disk=PATH           Disk to use in test. test specific meaning:"
+	echo "                          virtio - disk path (file or block device ex: /dev/nvme0n1)"
+	echo "                          kernel_vhost - the WWN number to be used"
+	echo "                          spdk_vhost - the socket path. Default is WORK_DIR/vhost/usvhost"
+	echo "    --os=OS_QCOW2         Custom OS qcow2 image file"
+	echo "    --os-mode=MODE        MODE how to use provided image: default: backing"
+	echo "                          backing - create new image but use provided backing file"
+	echo "                          copy - copy provided image and use a copy"
+	echo "                          orginal - use file directly. Will modify the provided file"
+	echo "-x                        Turn on script debug (set -x)"
+	exit 0
+}
+disk=""
+raw_cache=""
+img_mode=""
+os=""
+while getopts 'xf:h-:' optchar; do
+	case "$optchar" in
+		-)
+		case "$OPTARG" in
+			help) usage $0 ;;
+			work-dir=*) TEST_DIR="${OPTARG#*=}" ;;
+			raw-cache=*) raw_cache="--raw-cache=${OPTARG#*=}" ;;
+			test-type=*) test_type="${OPTARG#*=}" ;;
+			disk=*) disk="${OPTARG#*=}" ;;
+			os=*) os="${OPTARG#*=}"
+				if [[ ! -r "$os" ]]; then
+					echo "ERROR: can't read '$os'"
+					usage $0
+				fi
+				os="$(readlink -f $os)"
+				;;
+			os-mode=*) os_mode="--os-mode=${OPTARG#*=}" ;;
+			*) usage $0 "Invalid argument '$OPTARG'" ;;
+		esac
+		;;
+	h) usage $0	;;
+	x) set -x ;;
+	f) force_vm_num="--force=${OPTARG#*=}" ;;
+	*) usage $0 "Invalid argument '$OPTARG'" ;;
+	esac
+done
+
+. $BASE_DIR/common.sh
+
+[[ -z "$os" ]] && os="$TEST_DIR/debian.qcow2"
+[[ $test_type == "spdk_vhost" ]] && [[ -z "$disk" ]] && disk="$SPDK_VHOST_SCSI_TEST_DIR/usvhost"
+if [[ $test_type == "kernel_vhost" ]] && [[ -z "$disk" ]]; then
+	echo "ERROR: for $test_type '--disk=WWN' is mandatory"
+	exit 1
+fi
+
+vm_setup \
+	--os=$os \
+	--disk-type=$test_type \
+	--disks=$disk \
+	$wwn $raw_cache $force_vm_num $os_mode
diff --git a/test/vhost/fiotest/vm_shutdown.sh b/test/vhost/fiotest/vm_shutdown.sh
new file mode 100755
index 000000000..81a24173c
--- /dev/null
+++ b/test/vhost/fiotest/vm_shutdown.sh
@@ -0,0 +1,65 @@
+#!/usr/bin/env bash
+
+BASE_DIR=$(readlink -f $(dirname $0))
+[[ -z "$TEST_DIR" ]] && TEST_DIR="$(cd $BASE_DIR/../../../../ && pwd)"
+
+function usage()
+{
+	[[ ! -z $2 ]] && ( echo "$2"; echo ""; )
+	echo "Shortcut script for shutting down VMs"
+	echo "Usage: $(basename $1) [OPTIONS] [VMs]"
+	echo
+	echo "-h, --help                print help and exit"
+	echo "    --work-dir=WORK_DIR   Where to find build file. Must exist. [default: ./..]"
+	echo "-a                        kill/shutdown all running VMs"
+	echo "-k                        kill instead of shutdown"
+	exit 0
+}
+optspec='akh-:'
+do_kill=false
+all=false
+
+while getopts "$optspec" optchar; do
+	case "$optchar" in
+		-)
+		case "$OPTARG" in
+			help)       usage $0 ;;
+			work-dir=*) TEST_DIR="${OPTARG#*=}" ;;
+			*)           usage $0 "Invalid argument '$OPTARG'" ;;
+		esac
+		;;
+	h) usage $0 ;;
+	k) do_kill=true ;;
+	a) all=true ;;
+	*) usage $0 "Invalid argument '$OPTARG'"
+	esac
+done
+
+. $BASE_DIR/common.sh
+
+if $do_kill && [[ $EUID -ne 0 ]]; then
+	echo "Go away user come back as root"
+	exit 1
+fi
+
+if $all; then
+	if do_kill; then
+		echo 'INFO: killing all VMs'
+		vm_kill_all
+	else
+		echo 'INFO: shutting down all VMs'
+		vm_shutdown_all
+	fi
+else
+	shift $((OPTIND-1))
+
+	if do_kill; then
+		echo 'INFO: killing VMs: $@'
+		for vm in $@; do
+			vm_kill $vm
+		done
+	else
+		echo 'INFO: shutting down all VMs'
+		vm_shutdown_all
+	fi
+fi
diff --git a/test/vhost/fiotest/vm_ssh.sh b/test/vhost/fiotest/vm_ssh.sh
new file mode 100755
index 000000000..c36f87983
--- /dev/null
+++ b/test/vhost/fiotest/vm_ssh.sh
@@ -0,0 +1,58 @@
+#!/usr/bin/env bash
+
+BASE_DIR=$(readlink -f $(dirname $0))
+[[ -z "$TEST_DIR" ]] && TEST_DIR="$(cd $BASE_DIR/../../../../ && pwd)"
+
+function usage()
+{
+	[[ ! -z $2 ]] && ( echo "$2"; echo ""; )
+	echo "Shortcut script for connecting to or executing command on selected VM"
+	echo "Usage: $(basename $1) [OPTIONS] VM_NUMBER"
+	echo
+	echo "-h, --help                print help and exit"
+	echo "    --work-dir=WORK_DIR   Where to find build file. Must exist. [default: $TEST_DIR]"
+	echo "-w                        Don't wait for vm to boot"
+	echo "-x                        set -x for script debug"
+	exit 0
+}
+
+boot_wait=true
+while getopts 'xwh-:' optchar; do
+	case "$optchar" in
+		-)
+		case "$OPTARG" in
+			help)	usage $0 ;;
+			work-dir=*) TEST_DIR="${OPTARG#*=}" ;;
+			*) usage $0 "Invalid argument '$OPTARG'" ;;
+		esac ;;
+	h) usage $0 ;;
+	w) boot_wait=false ;;
+	x) set -x ;;
+	*) usage $0 "Invalid argument '$OPTARG'" ;;
+	esac
+done
+
+. $BASE_DIR/common.sh
+
+shift $((OPTIND-1))
+vm_num="$1"
+shift
+
+
+if ! vm_num_is_valid $vm_num; then
+	usage $0 "Invalid VM num $vm_num"
+	exit 1
+fi
+
+if $boot_wait; then
+	while ! vm_os_booted $vm_num; do
+		if ! vm_is_running $vm_num; then
+			echo "ERROR: VM$vm_num is not running"
+				exit 1
+		fi
+		echo "INFO: waiting for VM$vm_num to boot"
+		sleep 1
+	done
+fi
+
+vm_ssh $vm_num "$@"
diff --git a/test/vhost/spdk_vhost.sh b/test/vhost/spdk_vhost.sh
new file mode 100755
index 000000000..f66414ccd
--- /dev/null
+++ b/test/vhost/spdk_vhost.sh
@@ -0,0 +1,40 @@
+#!/usr/bin/env bash
+
+WORKDIR=$(dirname $0)
+cd $WORKDIR
+
+param="$1"
+
+if [ $(uname -s) = Linux ]; then
+
+NRHUGE=4096 ./../../scripts/setup.sh
+echo Running SPDK vhost fio autotest...
+
+case $param in
+    -p|--performance)
+	echo Running performance suite...
+	./fiotest/autotest.sh --fio-bin=/home/sys_sgsw/fio_ubuntu \
+	--vm=0,/home/sys_sgsw/vhost_scsi_vm_image.qcow2,Nvme0n1p0 \
+	--test-type=spdk_vhost \
+	--fio-jobs=$WORKDIR/fiotest/fio_jobs/default_performance.job \
+	--qemu-src=/home/sys_sgsw/vhost_scsi/qemu
+    ;;
+    -i|--integrity)
+	echo Running integrity suite...
+	./fiotest/autotest.sh --fio-bin=/home/sys_sgsw/fio_ubuntu \
+	--vm=0,/home/sys_sgsw/vhost_scsi_vm_image.qcow2,Nvme0n1p0:Nvme0n1p1:Nvme0n1p2:Nvme0n1p3 \
+	--test-type=spdk_vhost \
+	--fio-jobs=$WORKDIR/fiotest/fio_jobs/default_integrity.job \
+	--qemu-src=/home/sys_sgsw/vhost_scsi/qemu
+    ;;
+    -h|--help)
+	echo "-i|--integrity 		for running an integrity test"
+	echo "-p|--performance 		for running a performance test"
+	echo "-h|--help 		prints this message"
+    ;;
+    *)
+	echo "unknown test type"
+    ;;
+esac
+
+fi