From 9a6fe60faa77efbff817a844d29199ffa49a1018 Mon Sep 17 00:00:00 2001 From: Darek Stojaczyk Date: Fri, 23 Nov 2018 15:36:18 +0100 Subject: [PATCH] pci: retry hotplugging DPDK device DPDK 18.11+ multi-process hotplug isn't robust. Multiple secondary processes starting at the same time might cause the internal IPC to misbehave. Just retry hotplugging/hotremoving the device in such case. Change-Id: I1f830c2c0dbe1d63eca9a116101b3d202172b2ca Signed-off-by: Darek Stojaczyk Reviewed-on: https://review.gerrithub.io/434539 Tested-by: SPDK CI Jenkins Chandler-Test-Pool: SPDK Automated Test System Reviewed-by: Jim Harris Reviewed-by: Shuhei Matsumoto --- lib/env_dpdk/pci.c | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/lib/env_dpdk/pci.c b/lib/env_dpdk/pci.c index 5993f3f15..0f3d4c53a 100644 --- a/lib/env_dpdk/pci.c +++ b/lib/env_dpdk/pci.c @@ -40,6 +40,11 @@ #define PCI_CFG_SIZE 256 #define PCI_EXT_CAP_ID_SN 0x03 +/* DPDK 18.11+ hotplug isn't robust. Multiple apps starting at the same time + * might cause the internal IPC to misbehave. Just retry in such case. + */ +#define DPDK_HOTPLUG_RETRY_COUNT 4 + static pthread_mutex_t g_pci_mutex = PTHREAD_MUTEX_INITIALIZER; static TAILQ_HEAD(, spdk_pci_device) g_pci_devices = TAILQ_HEAD_INITIALIZER(g_pci_devices); static TAILQ_HEAD(, spdk_pci_driver) g_pci_drivers = TAILQ_HEAD_INITIALIZER(g_pci_drivers); @@ -160,7 +165,13 @@ spdk_pci_device_detach(struct spdk_pci_device *dev) assert(dev->attached); dev->attached = false; #if RTE_VERSION >= RTE_VERSION_NUM(18, 11, 0, 0) - rte_eal_hotplug_remove("pci", device->device.name); + char bdf[32]; + int i = 0, rc; + + snprintf(bdf, sizeof(bdf), "%s", device->device.name); + do { + rc = rte_eal_hotplug_remove("pci", bdf); + } while (rc == -ENOMSG && ++i <= DPDK_HOTPLUG_RETRY_COUNT); #elif RTE_VERSION >= RTE_VERSION_NUM(17, 11, 0, 3) rte_eal_dev_detach(&device->device); #elif RTE_VERSION >= RTE_VERSION_NUM(17, 05, 0, 4) @@ -226,7 +237,18 @@ spdk_pci_device_attach(struct spdk_pci_driver *driver, driver->cb_arg = enum_ctx; #if RTE_VERSION >= RTE_VERSION_NUM(18, 11, 0, 0) - rc = rte_eal_hotplug_add("pci", bdf, ""); + int i = 0; + + do { + rc = rte_eal_hotplug_add("pci", bdf, ""); + } while (rc == -ENOMSG && ++i <= DPDK_HOTPLUG_RETRY_COUNT); + + if (i > 1 && rc == -EEXIST) { + /* Even though the previous request timed out, the device + * was attached successfully. + */ + rc = 0; + } #elif RTE_VERSION >= RTE_VERSION_NUM(17, 11, 0, 3) rc = rte_eal_dev_attach(bdf, ""); #elif RTE_VERSION >= RTE_VERSION_NUM(17, 05, 0, 4)