From f43a485299e550278b972db36793815a3c3c60e4 Mon Sep 17 00:00:00 2001 From: Darek Stojaczyk Date: Wed, 3 Jul 2019 05:52:53 +0000 Subject: [PATCH] env_dpdk/pci: make spdk_pci_device_detach() synchronous again By making dpdk device detach asynchronous we have actually broken some cases where devices are re-attached immediately after and fail since they were not detached yet, so now we're making device detach synchronous again. For that we'll simply wait inside spdk_pci_device_detach() for the background dpdk thread to perform all necessary actions before we return. We'll also print an error msg if DPDK failed the detach (probably because of some internal error). Change-Id: I7657ac1b169169eae3325de2d28c2cc311e7d901 Signed-off-by: Darek Stojaczyk Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/460286 Tested-by: SPDK CI Jenkins Reviewed-by: Ben Walker Reviewed-by: Shuhei Matsumoto Reviewed-by: --- lib/env_dpdk/pci.c | 39 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 37 insertions(+), 2 deletions(-) diff --git a/lib/env_dpdk/pci.c b/lib/env_dpdk/pci.c index d8459d485..b4af0d18e 100644 --- a/lib/env_dpdk/pci.c +++ b/lib/env_dpdk/pci.c @@ -123,14 +123,49 @@ spdk_detach_rte_cb(void *_dev) static void spdk_detach_rte(struct spdk_pci_device *dev) { + struct rte_pci_device *rte_dev = dev->dev_handle; + int i; + bool removed; + /* The device was already marked as available and could be attached * again while we go asynchronous, so we explicitly forbid that. */ dev->internal.pending_removal = true; if (spdk_process_is_primary() && !pthread_equal(g_dpdk_tid, pthread_self())) { - rte_eal_alarm_set(10, spdk_detach_rte_cb, dev->dev_handle); + rte_eal_alarm_set(1, spdk_detach_rte_cb, rte_dev); + /* wait up to 20ms for the cb to start executing */ + for (i = 20; i > 0; i--) { + + spdk_delay_us(1000); + pthread_mutex_lock(&g_pci_mutex); + removed = dev->internal.removed; + pthread_mutex_unlock(&g_pci_mutex); + + if (removed) { + break; + } + } + + /* besides checking the removed flag, we also need to wait + * for the dpdk detach function to unwind, as it's doing some + * operations even after calling our detach callback. Simply + * cancell the alarm - if it started executing already, this + * call will block and wait for it to finish. + */ + rte_eal_alarm_cancel(spdk_detach_rte_cb, rte_dev); + + /* the device could have been finally removed, so just check + * it again. + */ + pthread_mutex_lock(&g_pci_mutex); + removed = dev->internal.removed; + pthread_mutex_unlock(&g_pci_mutex); + if (!removed) { + fprintf(stderr, "Timeout waiting for DPDK to remove PCI device %s.\n", + rte_dev->name); + } } else { - spdk_detach_rte_cb(dev->dev_handle); + spdk_detach_rte_cb(rte_dev); } }