From f570aa654a79702f80f441c43f0a701eba3990c7 Mon Sep 17 00:00:00 2001 From: Jim Harris Date: Tue, 23 Jan 2018 12:54:31 -0700 Subject: [PATCH] vhost: only split on 2MB boundaries when necessary vhost I/O only need to be split on 2MB boundaries if there is a break in the VM's memtable at that 2MB boundary. This should drastically reduce (if not eliminate) the intermittent test pool failures seen recently. virtio limits number of segments to 128, but this 2MB splitting could introduce additional segment breaks which we do not allocate IOVs for. In almost all cases, there are no memtable breaks except at low 2MB, so most of the extra segment breaks we are adding are unnecessary. Signed-off-by: Jim Harris Change-Id: I12d85c289ad80c7bb65e3d2030a2405092b19deb Reviewed-on: https://review.gerrithub.io/396058 Reviewed-by: Daniel Verkamp Tested-by: SPDK Automated Test System Reviewed-by: Changpeng Liu Reviewed-by: Dariusz Stojaczyk --- lib/vhost/vhost.c | 22 +++++++++++++++++++++- test/unit/lib/vhost/vhost.c/vhost_ut.c | 23 +++++------------------ 2 files changed, 26 insertions(+), 19 deletions(-) diff --git a/lib/vhost/vhost.c b/lib/vhost/vhost.c index f07f8933a..ef9c46746 100644 --- a/lib/vhost/vhost.c +++ b/lib/vhost/vhost.c @@ -329,6 +329,7 @@ spdk_vhost_vring_desc_to_iov(struct spdk_vhost_dev *vdev, struct iovec *iov, uint16_t *iov_index, const struct vring_desc *desc) { uint32_t remaining = desc->len; + uint32_t to_boundary; uint32_t len; uintptr_t payload = desc->addr; uintptr_t vva; @@ -343,7 +344,26 @@ spdk_vhost_vring_desc_to_iov(struct spdk_vhost_dev *vdev, struct iovec *iov, SPDK_ERRLOG("gpa_to_vva(%p) == NULL\n", (void *)payload); return -1; } - len = spdk_min(remaining, 0x200000 - _2MB_OFFSET(payload)); + to_boundary = 0x200000 - _2MB_OFFSET(payload); + if (spdk_likely(remaining <= to_boundary)) { + len = remaining; + } else { + /* + * Descriptor crosses a 2MB hugepage boundary. vhost memory regions are allocated + * from hugepage memory, so this means this descriptor may be described by + * discontiguous vhost memory regions. Do not blindly split on the 2MB boundary, + * only split it if the two sides of the boundary do not map to the same vhost + * memory region. This helps ensure we do not exceed the max number of IOVs + * defined by SPDK_VHOST_IOVS_MAX. + */ + len = to_boundary; + while (len < remaining) { + if (vva + len != (uintptr_t)spdk_vhost_gpa_to_vva(vdev, payload + len)) { + break; + } + len += spdk_min(remaining - len, 0x200000); + } + } iov[*iov_index].iov_base = (void *)vva; iov[*iov_index].iov_len = len; remaining -= len; diff --git a/test/unit/lib/vhost/vhost.c/vhost_ut.c b/test/unit/lib/vhost/vhost.c/vhost_ut.c index eb95ce7c4..19ecf9d76 100644 --- a/test/unit/lib/vhost/vhost.c/vhost_ut.c +++ b/test/unit/lib/vhost/vhost.c/vhost_ut.c @@ -164,31 +164,18 @@ desc_to_iov_test(void) iov_index = 0; rc = spdk_vhost_vring_desc_to_iov(vdev, iov, &iov_index, &desc); CU_ASSERT(rc == 0); - CU_ASSERT(iov_index == 2); + CU_ASSERT(iov_index == 1); CU_ASSERT(iov[0].iov_base == (void *)0x11F0000); - CU_ASSERT(iov[0].iov_len == 0x10000); - CU_ASSERT(iov[1].iov_base == (void *)0x1200000); - CU_ASSERT(iov[1].iov_len == 0x10000); + CU_ASSERT(iov[0].iov_len == 0x20000); memset(iov, 0, sizeof(iov)); /* Same test, but ensure it respects the non-zero starting iov_index. */ - iov_index = SPDK_VHOST_IOVS_MAX - 2; + iov_index = SPDK_VHOST_IOVS_MAX - 1; rc = spdk_vhost_vring_desc_to_iov(vdev, iov, &iov_index, &desc); CU_ASSERT(rc == 0); CU_ASSERT(iov_index == SPDK_VHOST_IOVS_MAX); - CU_ASSERT(iov[SPDK_VHOST_IOVS_MAX - 2].iov_base == (void *)0x11F0000); - CU_ASSERT(iov[SPDK_VHOST_IOVS_MAX - 2].iov_len == 0x10000); - CU_ASSERT(iov[SPDK_VHOST_IOVS_MAX - 1].iov_base == (void *)0x1200000); - CU_ASSERT(iov[SPDK_VHOST_IOVS_MAX - 1].iov_len == 0x10000); - memset(iov, 0, sizeof(iov)); - - /* - * This test should fail. The first part of the descriptor will fit in the last - * iov, but the part after the 2MB boundary would overflow. - */ - iov_index = SPDK_VHOST_IOVS_MAX - 1; - rc = spdk_vhost_vring_desc_to_iov(vdev, iov, &iov_index, &desc); - CU_ASSERT(rc != 0); + CU_ASSERT(iov[SPDK_VHOST_IOVS_MAX - 1].iov_base == (void *)0x11F0000); + CU_ASSERT(iov[SPDK_VHOST_IOVS_MAX - 1].iov_len == 0x20000); memset(iov, 0, sizeof(iov)); /* Test case where iov spans a vhost memory region. */