diff --git a/lib/env_dpdk/env_internal.h b/lib/env_dpdk/env_internal.h index 2f5826c8b..c5824632e 100644 --- a/lib/env_dpdk/env_internal.h +++ b/lib/env_dpdk/env_internal.h @@ -87,4 +87,16 @@ int spdk_pci_device_attach(struct spdk_pci_enum_ctx *ctx, spdk_pci_enum_cb enum_ void spdk_mem_map_init(void); void spdk_vtophys_init(void); +/** + * Increase the refcount of active DMA-capable devices managed by SPDK. + * This must be called after a `rte_pci_device` is created. + */ +void spdk_vtophys_get_ref(void); + +/** + * Decrease the refcount of active DMA-capable devices managed by SPDK. + * This must be called before a `rte_pci_device` is destroyed. + */ +void spdk_vtophys_put_ref(void); + #endif diff --git a/lib/env_dpdk/pci.c b/lib/env_dpdk/pci.c index 63f1a7373..73bfbf51e 100644 --- a/lib/env_dpdk/pci.c +++ b/lib/env_dpdk/pci.c @@ -45,6 +45,7 @@ spdk_pci_device_init(struct rte_pci_driver *driver, struct rte_pci_device *device) { struct spdk_pci_enum_ctx *ctx = (struct spdk_pci_enum_ctx *)driver; + int rc; if (!ctx->cb_fn) { #if RTE_VERSION >= RTE_VERSION_NUM(17, 05, 0, 4) @@ -66,12 +67,19 @@ spdk_pci_device_init(struct rte_pci_driver *driver, usleep(500 * 1000); } - return ctx->cb_fn(ctx->cb_arg, (struct spdk_pci_device *)device); + rc = ctx->cb_fn(ctx->cb_arg, (struct spdk_pci_device *)device); + if (rc != 0) { + return rc; + } + + spdk_vtophys_get_ref(); + return 0; } int spdk_pci_device_fini(struct rte_pci_device *device) { + spdk_vtophys_put_ref(); return 0; } diff --git a/lib/env_dpdk/vtophys.c b/lib/env_dpdk/vtophys.c index 52fa3e475..270af6776 100644 --- a/lib/env_dpdk/vtophys.c +++ b/lib/env_dpdk/vtophys.c @@ -56,15 +56,27 @@ /* Internal DPDK function forward declaration */ int pci_vfio_is_enabled(void); +struct spdk_vfio_dma_map { + struct vfio_iommu_type1_dma_map map; + TAILQ_ENTRY(spdk_vfio_dma_map) tailq; +}; + struct vfio_cfg { int fd; bool enabled; + unsigned device_ref; + TAILQ_HEAD(, spdk_vfio_dma_map) maps; + pthread_mutex_t mutex; }; static struct vfio_cfg g_vfio = { .fd = -1, - .enabled = false + .enabled = false, + .device_ref = 0, + .maps = TAILQ_HEAD_INITIALIZER(g_vfio.maps), + .mutex = PTHREAD_MUTEX_INITIALIZER }; + #else #define SPDK_VFIO_ENABLED 0 #endif @@ -82,42 +94,98 @@ static struct spdk_mem_map *g_vtophys_map; static int vtophys_iommu_map_dma(uint64_t vaddr, uint64_t iova, uint64_t size) { - struct vfio_iommu_type1_dma_map dma_map; + struct spdk_vfio_dma_map *dma_map; int ret; - dma_map.argsz = sizeof(dma_map); - dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE; - dma_map.vaddr = vaddr; - dma_map.iova = iova; - dma_map.size = size; - - ret = ioctl(g_vfio.fd, VFIO_IOMMU_MAP_DMA, &dma_map); - - if (ret) { - DEBUG_PRINT("Cannot set up DMA mapping, error %d\n", errno); + dma_map = calloc(1, sizeof(*dma_map)); + if (dma_map == NULL) { + return -ENOMEM; } - return ret; + dma_map->map.argsz = sizeof(dma_map->map); + dma_map->map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE; + dma_map->map.vaddr = vaddr; + dma_map->map.iova = iova; + dma_map->map.size = size; + + pthread_mutex_lock(&g_vfio.mutex); + if (g_vfio.device_ref == 0) { + /* VFIO requires at least one device (IOMMU group) to be added to + * a VFIO container before it is possible to perform any IOMMU + * operations on that container. This memory will be mapped once + * the first device (IOMMU group) is hotplugged. + * + * Since the vfio container is managed internally by DPDK, it is + * also possible that some device is already in that container, but + * it's not managed by SPDK - e.g. an NIC attached internally + * inside DPDK. We could map the memory straight away in such + * scenario, but there's no need to do it. DPDK devices clearly + * don't need our mappings and hence we defer the mapping + * unconditionally until the first SPDK-managed device is + * hotplugged. + */ + goto out_insert; + } + + ret = ioctl(g_vfio.fd, VFIO_IOMMU_MAP_DMA, &dma_map->map); + if (ret) { + DEBUG_PRINT("Cannot set up DMA mapping, error %d\n", errno); + pthread_mutex_unlock(&g_vfio.mutex); + free(dma_map); + return ret; + } + +out_insert: + TAILQ_INSERT_TAIL(&g_vfio.maps, dma_map, tailq); + pthread_mutex_unlock(&g_vfio.mutex); + return 0; } static int vtophys_iommu_unmap_dma(uint64_t iova, uint64_t size) { struct vfio_iommu_type1_dma_unmap dma_unmap; + struct spdk_vfio_dma_map *dma_map; int ret; + pthread_mutex_lock(&g_vfio.mutex); + TAILQ_FOREACH(dma_map, &g_vfio.maps, tailq) { + if (dma_map->map.iova == iova) { + break; + } + } + + if (dma_map == NULL) { + DEBUG_PRINT("Cannot clear DMA mapping for IOVA %"PRIx64" - it's not mapped\n", iova); + pthread_mutex_unlock(&g_vfio.mutex); + return -ENXIO; + } + + /** don't support partial or multiple-page unmap for now */ + assert(dma_map->map.size == size); + + if (g_vfio.device_ref == 0) { + /* Memory is not mapped anymore, just remove it's references */ + goto out_remove; + } + dma_unmap.argsz = sizeof(dma_unmap); dma_unmap.flags = 0; dma_unmap.iova = iova; dma_unmap.size = size; ret = ioctl(g_vfio.fd, VFIO_IOMMU_UNMAP_DMA, &dma_unmap); - if (ret) { DEBUG_PRINT("Cannot clear DMA mapping, error %d\n", errno); + pthread_mutex_unlock(&g_vfio.mutex); + return ret; } - return ret; +out_remove: + TAILQ_REMOVE(&g_vfio.maps, dma_map, tailq); + pthread_mutex_unlock(&g_vfio.mutex); + free(dma_map); + return 0; } #endif @@ -312,6 +380,75 @@ spdk_vtophys_iommu_init(void) } #endif +void +spdk_vtophys_get_ref(void) +{ +#if SPDK_VFIO_ENABLED + struct spdk_vfio_dma_map *dma_map; + int ret; + + if (!g_vfio.enabled) { + return; + } + + pthread_mutex_lock(&g_vfio.mutex); + g_vfio.device_ref++; + if (g_vfio.device_ref > 1) { + pthread_mutex_unlock(&g_vfio.mutex); + return; + } + + /* This is the first SPDK device using DPDK vfio. This means that the first + * IOMMU group might have been just been added to the DPDK vfio container. + * From this point it is certain that the memory can be mapped now. + */ + TAILQ_FOREACH(dma_map, &g_vfio.maps, tailq) { + ret = ioctl(g_vfio.fd, VFIO_IOMMU_MAP_DMA, &dma_map->map); + if (ret) { + DEBUG_PRINT("Cannot update DMA mapping, error %d\n", errno); + break; + } + } + pthread_mutex_unlock(&g_vfio.mutex); +#endif +} + +void +spdk_vtophys_put_ref(void) +{ +#if SPDK_VFIO_ENABLED + struct spdk_vfio_dma_map *dma_map; + int ret; + + if (!g_vfio.enabled) { + return; + } + + pthread_mutex_lock(&g_vfio.mutex); + assert(g_vfio.device_ref > 0); + g_vfio.device_ref--; + if (g_vfio.device_ref > 0) { + pthread_mutex_unlock(&g_vfio.mutex); + return; + } + + /* This is the last SPDK device using DPDK vfio. If DPDK doesn't have + * any additional devices using it's vfio container, all the mappings + * will be automatically removed by the Linux vfio driver. We unmap + * the memory manually to be able to easily re-map it later regardless + * of other, external factors. + */ + TAILQ_FOREACH(dma_map, &g_vfio.maps, tailq) { + ret = ioctl(g_vfio.fd, VFIO_IOMMU_UNMAP_DMA, &dma_map->map); + if (ret) { + DEBUG_PRINT("Cannot unmap DMA memory, error %d\n", errno); + break; + } + } + pthread_mutex_unlock(&g_vfio.mutex); +#endif +} + void spdk_vtophys_init(void) {