From 3d1b60557cb0bc787120f5b50c5abafc5870fded Mon Sep 17 00:00:00 2001 From: Jim Harris Date: Mon, 14 Nov 2016 18:17:09 -0700 Subject: [PATCH] env: add spdk_vtophys_register/unregister These APIs can be used to register/unregister regions of pinned, huge page memory that are separate from huge page memory allocated by the default DPDK allocations. These APIs will be used by an upcoming SPDK vhost-scsi target to enable SPDK to target NVMe DMA operations directly to VM memory that has been allocated by QEMU using pinned huge pages. Signed-off-by: Jim Harris Change-Id: I649a4adeeb758b29bd29cd42c8872eed3d5d6ce9 --- include/spdk/env.h | 13 ++++ lib/env_dpdk/vtophys.c | 142 +++++++++++++++++++++++++++++++++++++++-- 2 files changed, 151 insertions(+), 4 deletions(-) diff --git a/include/spdk/env.h b/include/spdk/env.h index 7fc921d9b..f25afc7c6 100644 --- a/include/spdk/env.h +++ b/include/spdk/env.h @@ -154,6 +154,19 @@ void spdk_delay_us(unsigned int us); uint64_t spdk_vtophys(void *buf); +/** + * Register the specified memory region for vtophys address translation. + * The memory region must map to pinned huge pages (2MB or greater). + */ +void spdk_vtophys_register(void *vaddr, uint64_t len); + +/** + * Unregister the specified memory region from vtophys address translation. + * The caller must ensure all in-flight DMA operations to this memory region + * are completed or cancelled before calling this function. + */ +void spdk_vtophys_unregister(void *vaddr, uint64_t len); + enum spdk_pci_device_type { SPDK_PCI_DEVICE_NVME, SPDK_PCI_DEVICE_IOAT, diff --git a/lib/env_dpdk/vtophys.c b/lib/env_dpdk/vtophys.c index 2711e81f3..653bbd974 100644 --- a/lib/env_dpdk/vtophys.c +++ b/lib/env_dpdk/vtophys.c @@ -64,9 +64,16 @@ #define MAP_128TB_IDX(vfn_2mb) ((vfn_2mb) >> (SHIFT_1GB - SHIFT_2MB)) #define MAP_1GB_IDX(vfn_2mb) ((vfn_2mb) & ((1ULL << (SHIFT_1GB - SHIFT_2MB + 1)) - 1)) +/* Max value for a 48-bit PFN. */ +#define INVALID_PFN (0xFFFFFFFFFFFFULL) + +/* Max value for a 16-bit ref count. */ +#define VTOPHYS_MAX_REF_COUNT (0xFFFF) + /* Physical page frame number of a single 2MB page. */ struct map_2mb { - uint64_t pfn_2mb; + uint64_t pfn_2mb : 48; + uint64_t ref_count : 16; }; /* Second-level map table indexed by bits [21..29] of the virtual address. @@ -123,6 +130,25 @@ vtophys_get_map(uint64_t vfn_2mb) return map_2mb; } +static uint64_t +vtophys_get_dpdk_paddr(void *vaddr) +{ + uintptr_t paddr; + + paddr = rte_mem_virt2phy(vaddr); + if (paddr == 0) { + /* + * The vaddr was valid but returned 0. Touch the page + * to ensure a backing page gets assigned, then call + * rte_mem_virt2phy() again. + */ + rte_atomic64_read((rte_atomic64_t *)vaddr); + paddr = rte_mem_virt2phy(vaddr); + } + + return paddr; +} + static uint64_t vtophys_get_pfn_2mb(uint64_t vfn_2mb) { @@ -149,7 +175,115 @@ vtophys_get_pfn_2mb(uint64_t vfn_2mb) } fprintf(stderr, "could not find 2MB vfn 0x%jx in DPDK mem config\n", vfn_2mb); - return -1; + return INVALID_PFN; +} + +static void +_spdk_vtophys_register_one(uint64_t vfn_2mb) +{ + struct map_2mb *map_2mb; + void *vaddr; + uint64_t paddr; + + map_2mb = vtophys_get_map(vfn_2mb); + if (!map_2mb) { + fprintf(stderr, "could not get vfn_2mb %p map\n", (void *)vfn_2mb); + return; + } + + if (map_2mb->pfn_2mb == INVALID_PFN) { + vaddr = (void *)(vfn_2mb << SHIFT_2MB); + paddr = vtophys_get_dpdk_paddr(vaddr); + if (paddr == RTE_BAD_PHYS_ADDR) { + fprintf(stderr, "could not get phys addr for %p\n", vaddr); + return; + } + + map_2mb->pfn_2mb = paddr >> SHIFT_2MB; + map_2mb->ref_count = 0; + } + + if (map_2mb->ref_count == VTOPHYS_MAX_REF_COUNT) { + fprintf(stderr, "ref count for %p already at %d\n", + (void *)(vfn_2mb << SHIFT_2MB), VTOPHYS_MAX_REF_COUNT); + return; + } + + map_2mb->ref_count++; +} + +static void +_spdk_vtophys_unregister_one(uint64_t vfn_2mb) +{ + struct map_2mb *map_2mb; + + map_2mb = vtophys_get_map(vfn_2mb); + if (!map_2mb) { + fprintf(stderr, "could not get vfn_2mb %p map\n", (void *)vfn_2mb); + return; + } + + if (map_2mb->pfn_2mb == INVALID_PFN || map_2mb->ref_count == 0) { + fprintf(stderr, "vaddr %p not registered\n", (void *)(vfn_2mb << SHIFT_2MB)); + return; + } + + map_2mb->ref_count--; + if (map_2mb->ref_count == 0) { + map_2mb->pfn_2mb = INVALID_PFN; + } +} + +void +spdk_vtophys_register(void *vaddr, uint64_t len) +{ + uint64_t vfn_2mb; + + if ((uintptr_t)vaddr & ~MASK_128TB) { + printf("invalid usermode virtual address %p\n", vaddr); + return; + } + + if (((uintptr_t)vaddr & MASK_2MB) || (len & MASK_2MB)) { + fprintf(stderr, "invalid %s parameters, vaddr=%p len=%ju\n", + __func__, vaddr, len); + return; + } + + vfn_2mb = (uintptr_t)vaddr >> SHIFT_2MB; + len = len >> SHIFT_2MB; + + while (len > 0) { + _spdk_vtophys_register_one(vfn_2mb); + vfn_2mb++; + len--; + } +} + +void +spdk_vtophys_unregister(void *vaddr, uint64_t len) +{ + uint64_t vfn_2mb; + + if ((uintptr_t)vaddr & ~MASK_128TB) { + printf("invalid usermode virtual address %p\n", vaddr); + return; + } + + if (((uintptr_t)vaddr & MASK_2MB) || (len & MASK_2MB)) { + fprintf(stderr, "invalid %s parameters, vaddr=%p len=%ju\n", + __func__, vaddr, len); + return; + } + + vfn_2mb = (uintptr_t)vaddr >> SHIFT_2MB; + len = len >> SHIFT_2MB; + + while (len > 0) { + _spdk_vtophys_unregister_one(vfn_2mb); + vfn_2mb++; + len--; + } } uint64_t @@ -172,9 +306,9 @@ spdk_vtophys(void *buf) } pfn_2mb = map_2mb->pfn_2mb; - if (pfn_2mb == SPDK_VTOPHYS_ERROR) { + if (pfn_2mb == INVALID_PFN) { pfn_2mb = vtophys_get_pfn_2mb(vfn_2mb); - if (pfn_2mb == SPDK_VTOPHYS_ERROR) { + if (pfn_2mb == INVALID_PFN) { return SPDK_VTOPHYS_ERROR; } map_2mb->pfn_2mb = pfn_2mb;