vhost: only split on 2MB boundaries when necessary

vhost I/O only need to be split on 2MB boundaries if
there is a break in the VM's memtable at that 2MB
boundary.

This should drastically reduce (if not eliminate)
the intermittent test pool failures seen recently.
virtio limits number of segments to 128, but this
2MB splitting could introduce additional segment
breaks which we do not allocate IOVs for.  In almost
all cases, there are no memtable breaks except at
low 2MB, so most of the extra segment breaks we are
adding are unnecessary.

Signed-off-by: Jim Harris <james.r.harris@intel.com>
Change-Id: I12d85c289ad80c7bb65e3d2030a2405092b19deb

Reviewed-on: https://review.gerrithub.io/396058
Reviewed-by: Daniel Verkamp <daniel.verkamp@intel.com>
Tested-by: SPDK Automated Test System <sys_sgsw@intel.com>
Reviewed-by: Changpeng Liu <changpeng.liu@intel.com>
Reviewed-by: Dariusz Stojaczyk <dariuszx.stojaczyk@intel.com>
This commit is contained in:
Jim Harris 2018-01-23 12:54:31 -07:00
parent e489ca69f6
commit f570aa654a
2 changed files with 26 additions and 19 deletions

View File

@ -329,6 +329,7 @@ spdk_vhost_vring_desc_to_iov(struct spdk_vhost_dev *vdev, struct iovec *iov,
uint16_t *iov_index, const struct vring_desc *desc)
{
uint32_t remaining = desc->len;
uint32_t to_boundary;
uint32_t len;
uintptr_t payload = desc->addr;
uintptr_t vva;
@ -343,7 +344,26 @@ spdk_vhost_vring_desc_to_iov(struct spdk_vhost_dev *vdev, struct iovec *iov,
SPDK_ERRLOG("gpa_to_vva(%p) == NULL\n", (void *)payload);
return -1;
}
len = spdk_min(remaining, 0x200000 - _2MB_OFFSET(payload));
to_boundary = 0x200000 - _2MB_OFFSET(payload);
if (spdk_likely(remaining <= to_boundary)) {
len = remaining;
} else {
/*
* Descriptor crosses a 2MB hugepage boundary. vhost memory regions are allocated
* from hugepage memory, so this means this descriptor may be described by
* discontiguous vhost memory regions. Do not blindly split on the 2MB boundary,
* only split it if the two sides of the boundary do not map to the same vhost
* memory region. This helps ensure we do not exceed the max number of IOVs
* defined by SPDK_VHOST_IOVS_MAX.
*/
len = to_boundary;
while (len < remaining) {
if (vva + len != (uintptr_t)spdk_vhost_gpa_to_vva(vdev, payload + len)) {
break;
}
len += spdk_min(remaining - len, 0x200000);
}
}
iov[*iov_index].iov_base = (void *)vva;
iov[*iov_index].iov_len = len;
remaining -= len;

View File

@ -164,31 +164,18 @@ desc_to_iov_test(void)
iov_index = 0;
rc = spdk_vhost_vring_desc_to_iov(vdev, iov, &iov_index, &desc);
CU_ASSERT(rc == 0);
CU_ASSERT(iov_index == 2);
CU_ASSERT(iov_index == 1);
CU_ASSERT(iov[0].iov_base == (void *)0x11F0000);
CU_ASSERT(iov[0].iov_len == 0x10000);
CU_ASSERT(iov[1].iov_base == (void *)0x1200000);
CU_ASSERT(iov[1].iov_len == 0x10000);
CU_ASSERT(iov[0].iov_len == 0x20000);
memset(iov, 0, sizeof(iov));
/* Same test, but ensure it respects the non-zero starting iov_index. */
iov_index = SPDK_VHOST_IOVS_MAX - 2;
iov_index = SPDK_VHOST_IOVS_MAX - 1;
rc = spdk_vhost_vring_desc_to_iov(vdev, iov, &iov_index, &desc);
CU_ASSERT(rc == 0);
CU_ASSERT(iov_index == SPDK_VHOST_IOVS_MAX);
CU_ASSERT(iov[SPDK_VHOST_IOVS_MAX - 2].iov_base == (void *)0x11F0000);
CU_ASSERT(iov[SPDK_VHOST_IOVS_MAX - 2].iov_len == 0x10000);
CU_ASSERT(iov[SPDK_VHOST_IOVS_MAX - 1].iov_base == (void *)0x1200000);
CU_ASSERT(iov[SPDK_VHOST_IOVS_MAX - 1].iov_len == 0x10000);
memset(iov, 0, sizeof(iov));
/*
* This test should fail. The first part of the descriptor will fit in the last
* iov, but the part after the 2MB boundary would overflow.
*/
iov_index = SPDK_VHOST_IOVS_MAX - 1;
rc = spdk_vhost_vring_desc_to_iov(vdev, iov, &iov_index, &desc);
CU_ASSERT(rc != 0);
CU_ASSERT(iov[SPDK_VHOST_IOVS_MAX - 1].iov_base == (void *)0x11F0000);
CU_ASSERT(iov[SPDK_VHOST_IOVS_MAX - 1].iov_len == 0x20000);
memset(iov, 0, sizeof(iov));
/* Test case where iov spans a vhost memory region. */