From d5c8c6432c9e73e5d12ce49e29bea275d0699448 Mon Sep 17 00:00:00 2001 From: Changpeng Liu Date: Fri, 20 Aug 2021 17:00:33 +0800 Subject: [PATCH] env/dpdk: revert 8f7d9ec "env/dpdk: Use the DPDK device count for IOMMU mapping" This patch revert commit 8f7d9ec. In function vtophys_iommu_init(), we can use `dev->drvier` in RTE_DEV_FOREACH() loop to count number of devices probed by device driver using vfio APIs, or we will count all the PCI devices that bind to vfio-pci driver, only the probed device's IOMMU group is added to vfio container. The original implementation is correct to count `g_vfio.device_ref` in vtophys_pci_device_added(), we don't need to count it in vtophys_iommu_device_event() callback. Fix issue #2086. Change-Id: Ib1502a67960a49e9a2f93823cc8ceab2e8303134 Signed-off-by: Changpeng Liu Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/9236 Community-CI: Broadcom CI Community-CI: Mellanox Build Bot Tested-by: SPDK CI Jenkins Reviewed-by: Tomasz Zawadzki Reviewed-by: Dong Yi Reviewed-by: Jim Harris --- lib/env_dpdk/env_internal.h | 1 - lib/env_dpdk/init.c | 2 - lib/env_dpdk/memory.c | 207 ++++++++++++------------------------ test/env/memory/memory_ut.c | 8 -- 4 files changed, 66 insertions(+), 152 deletions(-) diff --git a/lib/env_dpdk/env_internal.h b/lib/env_dpdk/env_internal.h index 670b8cc39..2303f432c 100644 --- a/lib/env_dpdk/env_internal.h +++ b/lib/env_dpdk/env_internal.h @@ -80,7 +80,6 @@ void pci_env_reinit(void); void pci_env_fini(void); int mem_map_init(bool legacy_mem); int vtophys_init(void); -void vtophys_fini(void); /** * Report a DMA-capable PCI device to the vtophys translation code. diff --git a/lib/env_dpdk/init.c b/lib/env_dpdk/init.c index 5854bd9cb..3a77aa981 100644 --- a/lib/env_dpdk/init.c +++ b/lib/env_dpdk/init.c @@ -531,8 +531,6 @@ spdk_env_dpdk_post_init(bool legacy_mem) void spdk_env_dpdk_post_fini(void) { - vtophys_fini(); - pci_env_fini(); free_args(g_eal_cmdline, g_eal_cmdline_argcount); diff --git a/lib/env_dpdk/memory.c b/lib/env_dpdk/memory.c index 707a51d08..62c594227 100644 --- a/lib/env_dpdk/memory.c +++ b/lib/env_dpdk/memory.c @@ -35,7 +35,6 @@ #include "env_internal.h" -#include #include #include #include @@ -1218,95 +1217,6 @@ vfio_noiommu_enabled(void) return rte_vfio_noiommu_is_enabled(); } -static void -vtophys_iommu_device_event(const char *device_name, - enum rte_dev_event_type event, - void *cb_arg) -{ - struct rte_dev_iterator dev_iter; - struct rte_device *dev; - - pthread_mutex_lock(&g_vfio.mutex); - - switch (event) { - default: - case RTE_DEV_EVENT_ADD: - RTE_DEV_FOREACH(dev, "bus=pci", &dev_iter) { - if (strcmp(dev->name, device_name) == 0) { - struct rte_pci_device *pci_dev = RTE_DEV_TO_PCI(dev); -#if RTE_VERSION < RTE_VERSION_NUM(20, 11, 0, 0) - if (pci_dev->kdrv == RTE_KDRV_VFIO) { -#else - if (pci_dev->kdrv == RTE_PCI_KDRV_VFIO) { -#endif - /* This is a new PCI device using vfio */ - g_vfio.device_ref++; - } - break; - } - } - - if (g_vfio.device_ref == 1) { - struct spdk_vfio_dma_map *dma_map; - int ret; - - /* This is the first device registered. This means that the first - * IOMMU group might have been just been added to the DPDK vfio container. - * From this point it is certain that the memory can be mapped now. - */ - TAILQ_FOREACH(dma_map, &g_vfio.maps, tailq) { - ret = ioctl(g_vfio.fd, VFIO_IOMMU_MAP_DMA, &dma_map->map); - if (ret) { - DEBUG_PRINT("Cannot update DMA mapping, error %d\n", errno); - break; - } - } - } - break; - case RTE_DEV_EVENT_REMOVE: - RTE_DEV_FOREACH(dev, "bus=pci", &dev_iter) { - if (strcmp(dev->name, device_name) == 0) { - struct rte_pci_device *pci_dev = RTE_DEV_TO_PCI(dev); -#if RTE_VERSION < RTE_VERSION_NUM(20, 11, 0, 0) - if (pci_dev->kdrv == RTE_KDRV_VFIO) { -#else - if (pci_dev->kdrv == RTE_PCI_KDRV_VFIO) { -#endif - /* This is a PCI device using vfio */ - g_vfio.device_ref--; - } - break; - } - } - - if (g_vfio.device_ref == 0) { - struct spdk_vfio_dma_map *dma_map; - int ret; - - /* If DPDK doesn't have any additional devices using it's vfio container, - * all the mappings will be automatically removed by the Linux vfio driver. - * We unmap the memory manually to be able to easily re-map it later regardless - * of other, external factors. - */ - TAILQ_FOREACH(dma_map, &g_vfio.maps, tailq) { - struct vfio_iommu_type1_dma_unmap unmap = {}; - unmap.argsz = sizeof(unmap); - unmap.flags = 0; - unmap.iova = dma_map->map.iova; - unmap.size = dma_map->map.size; - ret = ioctl(g_vfio.fd, VFIO_IOMMU_UNMAP_DMA, &unmap); - if (ret) { - DEBUG_PRINT("Cannot unmap DMA memory, error %d\n", errno); - break; - } - } - } - break; - } - - pthread_mutex_unlock(&g_vfio.mutex); -} - static void vtophys_iommu_init(void) { @@ -1315,9 +1225,6 @@ vtophys_iommu_init(void) const char vfio_path[] = "/dev/vfio/vfio"; DIR *dir; struct dirent *d; - struct rte_dev_iterator dev_iter; - struct rte_device *dev; - int rc; if (!vfio_enabled()) { return; @@ -1358,51 +1265,11 @@ vtophys_iommu_init(void) return; } - /* If the IOMMU is enabled, we need to track whether there are any devices present because - * it's only valid to perform vfio IOCTLs to the containers when there is at least - * one device. The device may be a DPDK device that SPDK doesn't otherwise know about, but - * that's ok. - */ - RTE_DEV_FOREACH(dev, "bus=pci", &dev_iter) { - struct rte_pci_device *pci_dev = RTE_DEV_TO_PCI(dev); - -#if RTE_VERSION < RTE_VERSION_NUM(20, 11, 0, 0) - if (pci_dev->kdrv == RTE_KDRV_VFIO) { -#else - if (pci_dev->kdrv == RTE_PCI_KDRV_VFIO) { -#endif - /* This is a PCI device using vfio */ - g_vfio.device_ref++; - } - } - - if (spdk_process_is_primary()) { - rc = rte_dev_event_callback_register(NULL, vtophys_iommu_device_event, NULL); - if (rc) { - DEBUG_PRINT("Failed to register device event callback\n"); - return; - } - rc = rte_dev_event_monitor_start(); - if (rc) { - DEBUG_PRINT("Failed to start device event monitoring.\n"); - return; - } - } - g_vfio.enabled = true; return; } -static void -vtophys_iommu_fini(void) -{ - if (spdk_process_is_primary()) { - rte_dev_event_callback_unregister(NULL, vtophys_iommu_device_event, NULL); - rte_dev_event_monitor_stop(); - } -} - #endif void @@ -1420,6 +1287,35 @@ vtophys_pci_device_added(struct rte_pci_device *pci_device) DEBUG_PRINT("Memory allocation error\n"); } pthread_mutex_unlock(&g_vtophys_pci_devices_mutex); + +#if VFIO_ENABLED + struct spdk_vfio_dma_map *dma_map; + int ret; + + if (!g_vfio.enabled) { + return; + } + + pthread_mutex_lock(&g_vfio.mutex); + g_vfio.device_ref++; + if (g_vfio.device_ref > 1) { + pthread_mutex_unlock(&g_vfio.mutex); + return; + } + + /* This is the first SPDK device using DPDK vfio. This means that the first + * IOMMU group might have been just been added to the DPDK vfio container. + * From this point it is certain that the memory can be mapped now. + */ + TAILQ_FOREACH(dma_map, &g_vfio.maps, tailq) { + ret = ioctl(g_vfio.fd, VFIO_IOMMU_MAP_DMA, &dma_map->map); + if (ret) { + DEBUG_PRINT("Cannot update DMA mapping, error %d\n", errno); + break; + } + } + pthread_mutex_unlock(&g_vfio.mutex); +#endif } void @@ -1436,6 +1332,43 @@ vtophys_pci_device_removed(struct rte_pci_device *pci_device) } } pthread_mutex_unlock(&g_vtophys_pci_devices_mutex); + +#if VFIO_ENABLED + struct spdk_vfio_dma_map *dma_map; + int ret; + + if (!g_vfio.enabled) { + return; + } + + pthread_mutex_lock(&g_vfio.mutex); + assert(g_vfio.device_ref > 0); + g_vfio.device_ref--; + if (g_vfio.device_ref > 0) { + pthread_mutex_unlock(&g_vfio.mutex); + return; + } + + /* This is the last SPDK device using DPDK vfio. If DPDK doesn't have + * any additional devices using it's vfio container, all the mappings + * will be automatically removed by the Linux vfio driver. We unmap + * the memory manually to be able to easily re-map it later regardless + * of other, external factors. + */ + TAILQ_FOREACH(dma_map, &g_vfio.maps, tailq) { + struct vfio_iommu_type1_dma_unmap unmap = {}; + unmap.argsz = sizeof(unmap); + unmap.flags = 0; + unmap.iova = dma_map->map.iova; + unmap.size = dma_map->map.size; + ret = ioctl(g_vfio.fd, VFIO_IOMMU_UNMAP_DMA, &unmap); + if (ret) { + DEBUG_PRINT("Cannot unmap DMA memory, error %d\n", errno); + break; + } + } + pthread_mutex_unlock(&g_vfio.mutex); +#endif } int @@ -1470,14 +1403,6 @@ vtophys_init(void) return 0; } -void -vtophys_fini(void) -{ -#if VFIO_ENABLED - vtophys_iommu_fini(); -#endif -} - uint64_t spdk_vtophys(const void *buf, uint64_t *size) { diff --git a/test/env/memory/memory_ut.c b/test/env/memory/memory_ut.c index 696cc7c1a..e0583375f 100644 --- a/test/env/memory/memory_ut.c +++ b/test/env/memory/memory_ut.c @@ -57,14 +57,6 @@ DEFINE_STUB(rte_vfio_noiommu_is_enabled, int, (void), 0); DEFINE_STUB(rte_memseg_get_fd_thread_unsafe, int, (const struct rte_memseg *ms), 0); DEFINE_STUB(rte_memseg_get_fd_offset_thread_unsafe, int, (const struct rte_memseg *ms, size_t *offset), 0); -DEFINE_STUB(rte_dev_iterator_init, int, (struct rte_dev_iterator *it, const char *dev_str), 0); -DEFINE_STUB(rte_dev_iterator_next, struct rte_device *, (struct rte_dev_iterator *it), NULL); -DEFINE_STUB(rte_dev_event_callback_register, int, (const char *device_name, - rte_dev_event_cb_fn cb_fn, void *cb_arg), 0); -DEFINE_STUB(rte_dev_event_callback_unregister, int, (const char *device_name, - rte_dev_event_cb_fn cb_fn, void *cb_arg), 0); -DEFINE_STUB(rte_dev_event_monitor_start, int, (void), 0); -DEFINE_STUB(rte_dev_event_monitor_stop, int, (void), 0); static int test_mem_map_notify(void *cb_ctx, struct spdk_mem_map *map,