diff --git a/lib/env_dpdk/env_internal.h b/lib/env_dpdk/env_internal.h index 2303f432c..670b8cc39 100644 --- a/lib/env_dpdk/env_internal.h +++ b/lib/env_dpdk/env_internal.h @@ -80,6 +80,7 @@ void pci_env_reinit(void); void pci_env_fini(void); int mem_map_init(bool legacy_mem); int vtophys_init(void); +void vtophys_fini(void); /** * Report a DMA-capable PCI device to the vtophys translation code. diff --git a/lib/env_dpdk/init.c b/lib/env_dpdk/init.c index 8778fbadb..e6464c93c 100644 --- a/lib/env_dpdk/init.c +++ b/lib/env_dpdk/init.c @@ -511,6 +511,8 @@ spdk_env_dpdk_post_init(bool legacy_mem) void spdk_env_dpdk_post_fini(void) { + vtophys_fini(); + pci_env_fini(); free_args(g_eal_cmdline, g_eal_cmdline_argcount); diff --git a/lib/env_dpdk/memory.c b/lib/env_dpdk/memory.c index c6f920ed6..f49dcf4fd 100644 --- a/lib/env_dpdk/memory.c +++ b/lib/env_dpdk/memory.c @@ -35,6 +35,7 @@ #include "env_internal.h" +#include #include #include #include @@ -1221,6 +1222,87 @@ vfio_noiommu_enabled(void) return rte_vfio_noiommu_is_enabled(); } +static void +vtophys_iommu_device_event(const char *device_name, + enum rte_dev_event_type event, + void *cb_arg) +{ + struct rte_dev_iterator dev_iter; + struct rte_device *dev; + + pthread_mutex_lock(&g_vfio.mutex); + + switch (event) { + default: + case RTE_DEV_EVENT_ADD: + RTE_DEV_FOREACH(dev, "bus=pci", &dev_iter) { + if (strcmp(dev->name, device_name) == 0) { + struct rte_pci_device *pci_dev = RTE_DEV_TO_PCI(dev); + if (pci_dev->kdrv == RTE_PCI_KDRV_VFIO) { + /* This is a new PCI device using vfio */ + g_vfio.device_ref++; + } + break; + } + } + + if (g_vfio.device_ref == 1) { + struct spdk_vfio_dma_map *dma_map; + int ret; + + /* This is the first device registered. This means that the first + * IOMMU group might have been just been added to the DPDK vfio container. + * From this point it is certain that the memory can be mapped now. + */ + TAILQ_FOREACH(dma_map, &g_vfio.maps, tailq) { + ret = ioctl(g_vfio.fd, VFIO_IOMMU_MAP_DMA, &dma_map->map); + if (ret) { + DEBUG_PRINT("Cannot update DMA mapping, error %d\n", errno); + break; + } + } + } + break; + case RTE_DEV_EVENT_REMOVE: + RTE_DEV_FOREACH(dev, "bus=pci", &dev_iter) { + if (strcmp(dev->name, device_name) == 0) { + struct rte_pci_device *pci_dev = RTE_DEV_TO_PCI(dev); + if (pci_dev->kdrv == RTE_PCI_KDRV_VFIO) { + /* This is a PCI device using vfio */ + g_vfio.device_ref--; + } + break; + } + } + + if (g_vfio.device_ref == 0) { + struct spdk_vfio_dma_map *dma_map; + int ret; + + /* If DPDK doesn't have any additional devices using it's vfio container, + * all the mappings will be automatically removed by the Linux vfio driver. + * We unmap the memory manually to be able to easily re-map it later regardless + * of other, external factors. + */ + TAILQ_FOREACH(dma_map, &g_vfio.maps, tailq) { + struct vfio_iommu_type1_dma_unmap unmap = {}; + unmap.argsz = sizeof(unmap); + unmap.flags = 0; + unmap.iova = dma_map->map.iova; + unmap.size = dma_map->map.size; + ret = ioctl(g_vfio.fd, VFIO_IOMMU_UNMAP_DMA, &unmap); + if (ret) { + DEBUG_PRINT("Cannot unmap DMA memory, error %d\n", errno); + break; + } + } + } + break; + } + + pthread_mutex_unlock(&g_vfio.mutex); +} + static void vtophys_iommu_init(void) { @@ -1229,6 +1311,9 @@ vtophys_iommu_init(void) const char vfio_path[] = "/dev/vfio/vfio"; DIR *dir; struct dirent *d; + struct rte_dev_iterator dev_iter; + struct rte_device *dev; + int rc; if (!vfio_enabled()) { return; @@ -1269,10 +1354,47 @@ vtophys_iommu_init(void) return; } + /* If the IOMMU is enabled, we need to track whether there are any devices present because + * it's only valid to perform vfio IOCTLs to the containers when there is at least + * one device. The device may be a DPDK device that SPDK doesn't otherwise know about, but + * that's ok. + */ + RTE_DEV_FOREACH(dev, "bus=pci", &dev_iter) { + struct rte_pci_device *pci_dev = RTE_DEV_TO_PCI(dev); + + if (pci_dev->kdrv == RTE_PCI_KDRV_VFIO) { + /* This is a PCI device using vfio */ + g_vfio.device_ref++; + } + } + + if (spdk_process_is_primary()) { + rc = rte_dev_event_callback_register(NULL, vtophys_iommu_device_event, NULL); + if (rc) { + DEBUG_PRINT("Failed to register device event callback\n"); + return; + } + rc = rte_dev_event_monitor_start(); + if (rc) { + DEBUG_PRINT("Failed to start device event monitoring.\n"); + return; + } + } + g_vfio.enabled = true; return; } + +static void +vtophys_iommu_fini(void) +{ + if (spdk_process_is_primary()) { + rte_dev_event_callback_unregister(NULL, vtophys_iommu_device_event, NULL); + rte_dev_event_monitor_stop(); + } +} + #endif void @@ -1290,35 +1412,6 @@ vtophys_pci_device_added(struct rte_pci_device *pci_device) DEBUG_PRINT("Memory allocation error\n"); } pthread_mutex_unlock(&g_vtophys_pci_devices_mutex); - -#if VFIO_ENABLED - struct spdk_vfio_dma_map *dma_map; - int ret; - - if (!g_vfio.enabled) { - return; - } - - pthread_mutex_lock(&g_vfio.mutex); - g_vfio.device_ref++; - if (g_vfio.device_ref > 1) { - pthread_mutex_unlock(&g_vfio.mutex); - return; - } - - /* This is the first SPDK device using DPDK vfio. This means that the first - * IOMMU group might have been just been added to the DPDK vfio container. - * From this point it is certain that the memory can be mapped now. - */ - TAILQ_FOREACH(dma_map, &g_vfio.maps, tailq) { - ret = ioctl(g_vfio.fd, VFIO_IOMMU_MAP_DMA, &dma_map->map); - if (ret) { - DEBUG_PRINT("Cannot update DMA mapping, error %d\n", errno); - break; - } - } - pthread_mutex_unlock(&g_vfio.mutex); -#endif } void @@ -1335,43 +1428,6 @@ vtophys_pci_device_removed(struct rte_pci_device *pci_device) } } pthread_mutex_unlock(&g_vtophys_pci_devices_mutex); - -#if VFIO_ENABLED - struct spdk_vfio_dma_map *dma_map; - int ret; - - if (!g_vfio.enabled) { - return; - } - - pthread_mutex_lock(&g_vfio.mutex); - assert(g_vfio.device_ref > 0); - g_vfio.device_ref--; - if (g_vfio.device_ref > 0) { - pthread_mutex_unlock(&g_vfio.mutex); - return; - } - - /* This is the last SPDK device using DPDK vfio. If DPDK doesn't have - * any additional devices using it's vfio container, all the mappings - * will be automatically removed by the Linux vfio driver. We unmap - * the memory manually to be able to easily re-map it later regardless - * of other, external factors. - */ - TAILQ_FOREACH(dma_map, &g_vfio.maps, tailq) { - struct vfio_iommu_type1_dma_unmap unmap = {}; - unmap.argsz = sizeof(unmap); - unmap.flags = 0; - unmap.iova = dma_map->map.iova; - unmap.size = dma_map->map.size; - ret = ioctl(g_vfio.fd, VFIO_IOMMU_UNMAP_DMA, &unmap); - if (ret) { - DEBUG_PRINT("Cannot unmap DMA memory, error %d\n", errno); - break; - } - } - pthread_mutex_unlock(&g_vfio.mutex); -#endif } int @@ -1405,6 +1461,14 @@ vtophys_init(void) return 0; } +void +vtophys_fini(void) +{ +#if VFIO_ENABLED + vtophys_iommu_fini(); +#endif +} + uint64_t spdk_vtophys(const void *buf, uint64_t *size) { diff --git a/test/env/memory/memory_ut.c b/test/env/memory/memory_ut.c index e0583375f..696cc7c1a 100644 --- a/test/env/memory/memory_ut.c +++ b/test/env/memory/memory_ut.c @@ -57,6 +57,14 @@ DEFINE_STUB(rte_vfio_noiommu_is_enabled, int, (void), 0); DEFINE_STUB(rte_memseg_get_fd_thread_unsafe, int, (const struct rte_memseg *ms), 0); DEFINE_STUB(rte_memseg_get_fd_offset_thread_unsafe, int, (const struct rte_memseg *ms, size_t *offset), 0); +DEFINE_STUB(rte_dev_iterator_init, int, (struct rte_dev_iterator *it, const char *dev_str), 0); +DEFINE_STUB(rte_dev_iterator_next, struct rte_device *, (struct rte_dev_iterator *it), NULL); +DEFINE_STUB(rte_dev_event_callback_register, int, (const char *device_name, + rte_dev_event_cb_fn cb_fn, void *cb_arg), 0); +DEFINE_STUB(rte_dev_event_callback_unregister, int, (const char *device_name, + rte_dev_event_cb_fn cb_fn, void *cb_arg), 0); +DEFINE_STUB(rte_dev_event_monitor_start, int, (void), 0); +DEFINE_STUB(rte_dev_event_monitor_stop, int, (void), 0); static int test_mem_map_notify(void *cb_ctx, struct spdk_mem_map *map,