From bab34df6cafbca66ccb01dba42a2944e08afcc15 Mon Sep 17 00:00:00 2001 From: Jim Harris Date: Fri, 28 Jul 2017 08:36:53 -0700 Subject: [PATCH] vhost: split iovs which span hugepage boundaries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A virtio descriptor may specify a buffer that spans a huge page boundary or even a vhost memory region. So modify spdk_vhost_vring_desc_to_iov() to take this into account. While here, also change spdk_vhost_vring_desc_to_iov to return int instead of bool. int return with -1 is most standard to indicate failure. Signed-off-by: Jim Harris Change-Id: I71faefce0367dc9e44d70ea4429a28dd64f04c10 Reviewed-on: https://review.gerrithub.io/371756 Tested-by: SPDK Automated Test System Reviewed-by: Daniel Verkamp Reviewed-by: Piotr Pelpliński Reviewed-by: Ben Walker Reviewed-by: Dariusz Stojaczyk --- lib/vhost/vhost.c | 33 ++++++- lib/vhost/vhost_blk.c | 5 +- lib/vhost/vhost_internal.h | 4 +- lib/vhost/vhost_scsi.c | 12 ++- test/unit/lib/vhost/vhost.c/vhost_ut.c | 123 ++++++++++++++++++++++++- 5 files changed, 160 insertions(+), 17 deletions(-) diff --git a/lib/vhost/vhost.c b/lib/vhost/vhost.c index bd6f6d32c..e82c4fe9b 100644 --- a/lib/vhost/vhost.c +++ b/lib/vhost/vhost.c @@ -143,13 +143,36 @@ spdk_vhost_vring_desc_is_wr(struct vring_desc *cur_desc) return !!(cur_desc->flags & VRING_DESC_F_WRITE); } -bool +#define _2MB_OFFSET(ptr) ((ptr) & (0x200000 - 1)) + +int spdk_vhost_vring_desc_to_iov(struct spdk_vhost_dev *vdev, struct iovec *iov, - const struct vring_desc *desc) + uint16_t *iov_index, const struct vring_desc *desc) { - iov->iov_base = spdk_vhost_gpa_to_vva(vdev, desc->addr); - iov->iov_len = desc->len; - return !iov->iov_base; + uint32_t remaining = desc->len; + uint32_t len; + uintptr_t payload = desc->addr; + uintptr_t vva; + + while (remaining) { + if (*iov_index >= SPDK_VHOST_IOVS_MAX) { + SPDK_ERRLOG("SPDK_VHOST_IOVS_MAX(%d) reached\n", SPDK_VHOST_IOVS_MAX); + return -1; + } + vva = (uintptr_t)spdk_vhost_gpa_to_vva(vdev, payload); + if (vva == 0) { + SPDK_ERRLOG("gpa_to_vva(%p) == NULL\n", (void *)payload); + return -1; + } + len = spdk_min(remaining, 0x200000 - _2MB_OFFSET(payload)); + iov[*iov_index].iov_base = (void *)vva; + iov[*iov_index].iov_len = len; + remaining -= len; + payload += len; + (*iov_index)++; + } + + return 0; } struct spdk_vhost_dev * diff --git a/lib/vhost/vhost_blk.c b/lib/vhost/vhost_blk.c index 01749c7d0..a46103672 100644 --- a/lib/vhost/vhost_blk.c +++ b/lib/vhost/vhost_blk.c @@ -137,14 +137,13 @@ blk_iovs_setup(struct spdk_vhost_dev *vdev, struct rte_vhost_vring *vq, uint16_t return -1; } - if (spdk_unlikely(spdk_vhost_vring_desc_to_iov(vdev, &iovs[cnt], desc))) { + if (spdk_unlikely(spdk_vhost_vring_desc_to_iov(vdev, iovs, &cnt, desc))) { SPDK_TRACELOG(SPDK_TRACE_VHOST_BLK, "Invalid descriptor %" PRIu16" (req_idx = %"PRIu16").\n", req_idx, cnt); return -1; } - len += iovs[cnt].iov_len; - cnt++; + len += desc->len; out_cnt += spdk_vhost_vring_desc_is_wr(desc); diff --git a/lib/vhost/vhost_internal.h b/lib/vhost/vhost_internal.h index 3bc999ab2..1cb4473ba 100644 --- a/lib/vhost/vhost_internal.h +++ b/lib/vhost/vhost_internal.h @@ -109,8 +109,8 @@ struct vring_desc *spdk_vhost_vring_desc_get_next(struct vring_desc *vq_desc, struct vring_desc *cur_desc); bool spdk_vhost_vring_desc_is_wr(struct vring_desc *cur_desc); -bool spdk_vhost_vring_desc_to_iov(struct spdk_vhost_dev *vdev, struct iovec *iov, - const struct vring_desc *desc); +int spdk_vhost_vring_desc_to_iov(struct spdk_vhost_dev *vdev, struct iovec *iov, + uint16_t *iov_index, const struct vring_desc *desc); struct spdk_vhost_dev *spdk_vhost_dev_find_by_vid(int vid); diff --git a/lib/vhost/vhost_scsi.c b/lib/vhost/vhost_scsi.c index 22b196de5..989f29a61 100644 --- a/lib/vhost/vhost_scsi.c +++ b/lib/vhost/vhost_scsi.c @@ -516,9 +516,11 @@ task_data_setup(struct spdk_vhost_scsi_task *task, /* All remaining descriptors are data. */ while (iovcnt < iovcnt_max) { - spdk_vhost_vring_desc_to_iov(vdev, &iovs[iovcnt], desc); + if (spdk_unlikely(spdk_vhost_vring_desc_to_iov(vdev, iovs, &iovcnt, desc))) { + task->resp = NULL; + goto abort_task; + } len += desc->len; - iovcnt++; if (!spdk_vhost_vring_desc_has_next(desc)) break; @@ -539,9 +541,11 @@ task_data_setup(struct spdk_vhost_scsi_task *task, /* Process descriptors up to response. */ while (!spdk_vhost_vring_desc_is_wr(desc) && iovcnt < iovcnt_max) { - spdk_vhost_vring_desc_to_iov(vdev, &iovs[iovcnt], desc); + if (spdk_unlikely(spdk_vhost_vring_desc_to_iov(vdev, iovs, &iovcnt, desc))) { + task->resp = NULL; + goto abort_task; + } len += desc->len; - iovcnt++; if (!spdk_vhost_vring_desc_has_next(desc)) { SPDK_WARNLOG("TO_DEV cmd: no response descriptor.\n"); diff --git a/test/unit/lib/vhost/vhost.c/vhost_ut.c b/test/unit/lib/vhost/vhost.c/vhost_ut.c index 86ca55282..0fb56b663 100644 --- a/test/unit/lib/vhost/vhost.c/vhost_ut.c +++ b/test/unit/lib/vhost/vhost.c/vhost_ut.c @@ -74,9 +74,126 @@ test_setup(void) return 0; } -static void -null_test(void) +static struct spdk_vhost_dev * +alloc_vdev(void) { + struct spdk_vhost_dev *vdev = NULL; + int rc; + + /* spdk_vhost_dev must be allocated on a cache line boundary. */ + rc = posix_memalign((void **)&vdev, 64, sizeof(*vdev)); + CU_ASSERT(rc == 0); + SPDK_CU_ASSERT_FATAL(vdev != NULL); + + vdev->mem = calloc(1, sizeof(*vdev->mem) + 2 * sizeof(struct rte_vhost_mem_region)); + SPDK_CU_ASSERT_FATAL(vdev->mem != NULL); + vdev->mem->nregions = 2; + vdev->mem->regions[0].guest_phys_addr = 0; + vdev->mem->regions[0].size = 0x400000; /* 4 MB */ + vdev->mem->regions[0].host_user_addr = 0x1000000; + vdev->mem->regions[1].guest_phys_addr = 0x400000; + vdev->mem->regions[1].size = 0x400000; /* 4 MB */ + vdev->mem->regions[1].host_user_addr = 0x2000000; + + return vdev; +} + +static void +free_vdev(struct spdk_vhost_dev *vdev) +{ + free(vdev->mem); + free(vdev); +} + +static void +desc_to_iov_test(void) +{ + struct spdk_vhost_dev *vdev; + struct iovec iov[SPDK_VHOST_IOVS_MAX]; + uint16_t iov_index; + struct vring_desc desc; + int rc; + + vdev = alloc_vdev(); + + /* Test simple case where iov falls fully within a 2MB page. */ + desc.addr = 0x110000; + desc.len = 0x1000; + iov_index = 0; + rc = spdk_vhost_vring_desc_to_iov(vdev, iov, &iov_index, &desc); + CU_ASSERT(rc == 0); + CU_ASSERT(iov_index == 1); + CU_ASSERT(iov[0].iov_base == (void *)0x1110000); + CU_ASSERT(iov[0].iov_len == 0x1000); + /* + * Always memset the iov to ensure each test validates data written by its call + * to the function under test. + */ + memset(iov, 0, sizeof(iov)); + + /* Same test, but ensure it respects the non-zero starting iov_index. */ + iov_index = SPDK_VHOST_IOVS_MAX - 1; + rc = spdk_vhost_vring_desc_to_iov(vdev, iov, &iov_index, &desc); + CU_ASSERT(rc == 0); + CU_ASSERT(iov_index == SPDK_VHOST_IOVS_MAX); + CU_ASSERT(iov[SPDK_VHOST_IOVS_MAX - 1].iov_base == (void *)0x1110000); + CU_ASSERT(iov[SPDK_VHOST_IOVS_MAX - 1].iov_len == 0x1000); + memset(iov, 0, sizeof(iov)); + + /* Test for failure if iov_index already equals SPDK_VHOST_IOVS_MAX. */ + iov_index = SPDK_VHOST_IOVS_MAX; + rc = spdk_vhost_vring_desc_to_iov(vdev, iov, &iov_index, &desc); + CU_ASSERT(rc != 0); + memset(iov, 0, sizeof(iov)); + + /* Test case where iov spans a 2MB boundary, but does not span a vhost memory region. */ + desc.addr = 0x1F0000; + desc.len = 0x20000; + iov_index = 0; + rc = spdk_vhost_vring_desc_to_iov(vdev, iov, &iov_index, &desc); + CU_ASSERT(rc == 0); + CU_ASSERT(iov_index == 2); + CU_ASSERT(iov[0].iov_base == (void *)0x11F0000); + CU_ASSERT(iov[0].iov_len == 0x10000); + CU_ASSERT(iov[1].iov_base == (void *)0x1200000); + CU_ASSERT(iov[1].iov_len == 0x10000); + memset(iov, 0, sizeof(iov)); + + /* Same test, but ensure it respects the non-zero starting iov_index. */ + iov_index = SPDK_VHOST_IOVS_MAX - 2; + rc = spdk_vhost_vring_desc_to_iov(vdev, iov, &iov_index, &desc); + CU_ASSERT(rc == 0); + CU_ASSERT(iov_index == SPDK_VHOST_IOVS_MAX); + CU_ASSERT(iov[SPDK_VHOST_IOVS_MAX - 2].iov_base == (void *)0x11F0000); + CU_ASSERT(iov[SPDK_VHOST_IOVS_MAX - 2].iov_len == 0x10000); + CU_ASSERT(iov[SPDK_VHOST_IOVS_MAX - 1].iov_base == (void *)0x1200000); + CU_ASSERT(iov[SPDK_VHOST_IOVS_MAX - 1].iov_len == 0x10000); + memset(iov, 0, sizeof(iov)); + + /* + * This test should fail. The first part of the descriptor will fit in the last + * iov, but the part after the 2MB boundary would overflow. + */ + iov_index = SPDK_VHOST_IOVS_MAX - 1; + rc = spdk_vhost_vring_desc_to_iov(vdev, iov, &iov_index, &desc); + CU_ASSERT(rc != 0); + memset(iov, 0, sizeof(iov)); + + /* Test case where iov spans a vhost memory region. */ + desc.addr = 0x3F0000; + desc.len = 0x20000; + iov_index = 0; + rc = spdk_vhost_vring_desc_to_iov(vdev, iov, &iov_index, &desc); + CU_ASSERT(rc == 0); + CU_ASSERT(iov_index == 2); + CU_ASSERT(iov[0].iov_base == (void *)0x13F0000); + CU_ASSERT(iov[0].iov_len == 0x10000); + CU_ASSERT(iov[1].iov_base == (void *)0x2000000); + CU_ASSERT(iov[1].iov_len == 0x10000); + memset(iov, 0, sizeof(iov)); + + free_vdev(vdev); + CU_ASSERT(true); } @@ -97,7 +214,7 @@ main(int argc, char **argv) } if ( - CU_add_test(suite, "null test", null_test) == NULL + CU_add_test(suite, "desc_to_iov", desc_to_iov_test) == NULL ) { CU_cleanup_registry(); return CU_get_error();