From 8f7d9ec2f4775c5627265c3c5dcce96397598c8f Mon Sep 17 00:00:00 2001 From: Ben Walker Date: Thu, 20 Aug 2020 12:25:27 -0700 Subject: [PATCH] env/dpdk: Use the DPDK device count for IOMMU mapping The VFIO container must have at least one device present to be able to perform DMA mapping operations. Instead of using the count of SPDK devices, use the count of DPDK devices. This allows for the user of SPDK's memory management APIs even if only a DPDK device is attached. Change-Id: Ie7e21f09bdf1cdf1a85424c35212f64f24ae4e26 Signed-off-by: Ben Walker Signed-off-by: Maciej Szwed Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/3874 Tested-by: SPDK CI Jenkins Reviewed-by: Changpeng Liu Reviewed-by: Tomasz Zawadzki --- lib/env_dpdk/env_internal.h | 1 + lib/env_dpdk/init.c | 2 + lib/env_dpdk/memory.c | 196 ++++++++++++++++++++++++------------ test/env/memory/memory_ut.c | 8 ++ 4 files changed, 141 insertions(+), 66 deletions(-) diff --git a/lib/env_dpdk/env_internal.h b/lib/env_dpdk/env_internal.h index 2303f432c..670b8cc39 100644 --- a/lib/env_dpdk/env_internal.h +++ b/lib/env_dpdk/env_internal.h @@ -80,6 +80,7 @@ void pci_env_reinit(void); void pci_env_fini(void); int mem_map_init(bool legacy_mem); int vtophys_init(void); +void vtophys_fini(void); /** * Report a DMA-capable PCI device to the vtophys translation code. diff --git a/lib/env_dpdk/init.c b/lib/env_dpdk/init.c index 8778fbadb..e6464c93c 100644 --- a/lib/env_dpdk/init.c +++ b/lib/env_dpdk/init.c @@ -511,6 +511,8 @@ spdk_env_dpdk_post_init(bool legacy_mem) void spdk_env_dpdk_post_fini(void) { + vtophys_fini(); + pci_env_fini(); free_args(g_eal_cmdline, g_eal_cmdline_argcount); diff --git a/lib/env_dpdk/memory.c b/lib/env_dpdk/memory.c index c6f920ed6..f49dcf4fd 100644 --- a/lib/env_dpdk/memory.c +++ b/lib/env_dpdk/memory.c @@ -35,6 +35,7 @@ #include "env_internal.h" +#include #include #include #include @@ -1221,6 +1222,87 @@ vfio_noiommu_enabled(void) return rte_vfio_noiommu_is_enabled(); } +static void +vtophys_iommu_device_event(const char *device_name, + enum rte_dev_event_type event, + void *cb_arg) +{ + struct rte_dev_iterator dev_iter; + struct rte_device *dev; + + pthread_mutex_lock(&g_vfio.mutex); + + switch (event) { + default: + case RTE_DEV_EVENT_ADD: + RTE_DEV_FOREACH(dev, "bus=pci", &dev_iter) { + if (strcmp(dev->name, device_name) == 0) { + struct rte_pci_device *pci_dev = RTE_DEV_TO_PCI(dev); + if (pci_dev->kdrv == RTE_PCI_KDRV_VFIO) { + /* This is a new PCI device using vfio */ + g_vfio.device_ref++; + } + break; + } + } + + if (g_vfio.device_ref == 1) { + struct spdk_vfio_dma_map *dma_map; + int ret; + + /* This is the first device registered. This means that the first + * IOMMU group might have been just been added to the DPDK vfio container. + * From this point it is certain that the memory can be mapped now. + */ + TAILQ_FOREACH(dma_map, &g_vfio.maps, tailq) { + ret = ioctl(g_vfio.fd, VFIO_IOMMU_MAP_DMA, &dma_map->map); + if (ret) { + DEBUG_PRINT("Cannot update DMA mapping, error %d\n", errno); + break; + } + } + } + break; + case RTE_DEV_EVENT_REMOVE: + RTE_DEV_FOREACH(dev, "bus=pci", &dev_iter) { + if (strcmp(dev->name, device_name) == 0) { + struct rte_pci_device *pci_dev = RTE_DEV_TO_PCI(dev); + if (pci_dev->kdrv == RTE_PCI_KDRV_VFIO) { + /* This is a PCI device using vfio */ + g_vfio.device_ref--; + } + break; + } + } + + if (g_vfio.device_ref == 0) { + struct spdk_vfio_dma_map *dma_map; + int ret; + + /* If DPDK doesn't have any additional devices using it's vfio container, + * all the mappings will be automatically removed by the Linux vfio driver. + * We unmap the memory manually to be able to easily re-map it later regardless + * of other, external factors. + */ + TAILQ_FOREACH(dma_map, &g_vfio.maps, tailq) { + struct vfio_iommu_type1_dma_unmap unmap = {}; + unmap.argsz = sizeof(unmap); + unmap.flags = 0; + unmap.iova = dma_map->map.iova; + unmap.size = dma_map->map.size; + ret = ioctl(g_vfio.fd, VFIO_IOMMU_UNMAP_DMA, &unmap); + if (ret) { + DEBUG_PRINT("Cannot unmap DMA memory, error %d\n", errno); + break; + } + } + } + break; + } + + pthread_mutex_unlock(&g_vfio.mutex); +} + static void vtophys_iommu_init(void) { @@ -1229,6 +1311,9 @@ vtophys_iommu_init(void) const char vfio_path[] = "/dev/vfio/vfio"; DIR *dir; struct dirent *d; + struct rte_dev_iterator dev_iter; + struct rte_device *dev; + int rc; if (!vfio_enabled()) { return; @@ -1269,10 +1354,47 @@ vtophys_iommu_init(void) return; } + /* If the IOMMU is enabled, we need to track whether there are any devices present because + * it's only valid to perform vfio IOCTLs to the containers when there is at least + * one device. The device may be a DPDK device that SPDK doesn't otherwise know about, but + * that's ok. + */ + RTE_DEV_FOREACH(dev, "bus=pci", &dev_iter) { + struct rte_pci_device *pci_dev = RTE_DEV_TO_PCI(dev); + + if (pci_dev->kdrv == RTE_PCI_KDRV_VFIO) { + /* This is a PCI device using vfio */ + g_vfio.device_ref++; + } + } + + if (spdk_process_is_primary()) { + rc = rte_dev_event_callback_register(NULL, vtophys_iommu_device_event, NULL); + if (rc) { + DEBUG_PRINT("Failed to register device event callback\n"); + return; + } + rc = rte_dev_event_monitor_start(); + if (rc) { + DEBUG_PRINT("Failed to start device event monitoring.\n"); + return; + } + } + g_vfio.enabled = true; return; } + +static void +vtophys_iommu_fini(void) +{ + if (spdk_process_is_primary()) { + rte_dev_event_callback_unregister(NULL, vtophys_iommu_device_event, NULL); + rte_dev_event_monitor_stop(); + } +} + #endif void @@ -1290,35 +1412,6 @@ vtophys_pci_device_added(struct rte_pci_device *pci_device) DEBUG_PRINT("Memory allocation error\n"); } pthread_mutex_unlock(&g_vtophys_pci_devices_mutex); - -#if VFIO_ENABLED - struct spdk_vfio_dma_map *dma_map; - int ret; - - if (!g_vfio.enabled) { - return; - } - - pthread_mutex_lock(&g_vfio.mutex); - g_vfio.device_ref++; - if (g_vfio.device_ref > 1) { - pthread_mutex_unlock(&g_vfio.mutex); - return; - } - - /* This is the first SPDK device using DPDK vfio. This means that the first - * IOMMU group might have been just been added to the DPDK vfio container. - * From this point it is certain that the memory can be mapped now. - */ - TAILQ_FOREACH(dma_map, &g_vfio.maps, tailq) { - ret = ioctl(g_vfio.fd, VFIO_IOMMU_MAP_DMA, &dma_map->map); - if (ret) { - DEBUG_PRINT("Cannot update DMA mapping, error %d\n", errno); - break; - } - } - pthread_mutex_unlock(&g_vfio.mutex); -#endif } void @@ -1335,43 +1428,6 @@ vtophys_pci_device_removed(struct rte_pci_device *pci_device) } } pthread_mutex_unlock(&g_vtophys_pci_devices_mutex); - -#if VFIO_ENABLED - struct spdk_vfio_dma_map *dma_map; - int ret; - - if (!g_vfio.enabled) { - return; - } - - pthread_mutex_lock(&g_vfio.mutex); - assert(g_vfio.device_ref > 0); - g_vfio.device_ref--; - if (g_vfio.device_ref > 0) { - pthread_mutex_unlock(&g_vfio.mutex); - return; - } - - /* This is the last SPDK device using DPDK vfio. If DPDK doesn't have - * any additional devices using it's vfio container, all the mappings - * will be automatically removed by the Linux vfio driver. We unmap - * the memory manually to be able to easily re-map it later regardless - * of other, external factors. - */ - TAILQ_FOREACH(dma_map, &g_vfio.maps, tailq) { - struct vfio_iommu_type1_dma_unmap unmap = {}; - unmap.argsz = sizeof(unmap); - unmap.flags = 0; - unmap.iova = dma_map->map.iova; - unmap.size = dma_map->map.size; - ret = ioctl(g_vfio.fd, VFIO_IOMMU_UNMAP_DMA, &unmap); - if (ret) { - DEBUG_PRINT("Cannot unmap DMA memory, error %d\n", errno); - break; - } - } - pthread_mutex_unlock(&g_vfio.mutex); -#endif } int @@ -1405,6 +1461,14 @@ vtophys_init(void) return 0; } +void +vtophys_fini(void) +{ +#if VFIO_ENABLED + vtophys_iommu_fini(); +#endif +} + uint64_t spdk_vtophys(const void *buf, uint64_t *size) { diff --git a/test/env/memory/memory_ut.c b/test/env/memory/memory_ut.c index e0583375f..696cc7c1a 100644 --- a/test/env/memory/memory_ut.c +++ b/test/env/memory/memory_ut.c @@ -57,6 +57,14 @@ DEFINE_STUB(rte_vfio_noiommu_is_enabled, int, (void), 0); DEFINE_STUB(rte_memseg_get_fd_thread_unsafe, int, (const struct rte_memseg *ms), 0); DEFINE_STUB(rte_memseg_get_fd_offset_thread_unsafe, int, (const struct rte_memseg *ms, size_t *offset), 0); +DEFINE_STUB(rte_dev_iterator_init, int, (struct rte_dev_iterator *it, const char *dev_str), 0); +DEFINE_STUB(rte_dev_iterator_next, struct rte_device *, (struct rte_dev_iterator *it), NULL); +DEFINE_STUB(rte_dev_event_callback_register, int, (const char *device_name, + rte_dev_event_cb_fn cb_fn, void *cb_arg), 0); +DEFINE_STUB(rte_dev_event_callback_unregister, int, (const char *device_name, + rte_dev_event_cb_fn cb_fn, void *cb_arg), 0); +DEFINE_STUB(rte_dev_event_monitor_start, int, (void), 0); +DEFINE_STUB(rte_dev_event_monitor_stop, int, (void), 0); static int test_mem_map_notify(void *cb_ctx, struct spdk_mem_map *map,