ENV_DPDK/VFIO: Increase PCI tear down timeout
When removing large number of devices (>8) in parallel,
the 20ms timeout is not long enough.
As part of spdk_detach_cb, DPDK calls into the VFIO driver
which may get delayed due to multiple hot removes being
processed by pciehp driver (pciehp IRQ thread function
is handling the actual removal of a device in paralle but
all of the IRQ thread function compete for a global mutex
increasing processing time and race conditions).
Signed-off-by: Michael Haeuptle <michael.haeuptle@hpe.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/1588 (master)
(cherry picked from commit 55df83ceb6
)
Change-Id: I470fbbee92dac9677082c873781efe41e2941cd5
Signed-off-by: Tomasz Zawadzki <tomasz.zawadzki@intel.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/2598
Reviewed-by: Jim Harris <james.r.harris@intel.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
Reviewed-by: Aleksey Marchuk <alexeymar@mellanox.com>
Reviewed-by: Michael Haeuptle <michaelhaeuptle@gmail.com>
This commit is contained in:
parent
fab97f2aac
commit
9bcc0ea8e8
@ -133,8 +133,8 @@ spdk_detach_rte(struct spdk_pci_device *dev)
|
||||
dev->internal.pending_removal = true;
|
||||
if (spdk_process_is_primary() && !pthread_equal(g_dpdk_tid, pthread_self())) {
|
||||
rte_eal_alarm_set(1, spdk_detach_rte_cb, rte_dev);
|
||||
/* wait up to 20ms for the cb to start executing */
|
||||
for (i = 20; i > 0; i--) {
|
||||
/* wait up to 2s for the cb to finish executing */
|
||||
for (i = 2000; i > 0; i--) {
|
||||
|
||||
spdk_delay_us(1000);
|
||||
pthread_mutex_lock(&g_pci_mutex);
|
||||
@ -149,7 +149,7 @@ spdk_detach_rte(struct spdk_pci_device *dev)
|
||||
/* besides checking the removed flag, we also need to wait
|
||||
* for the dpdk detach function to unwind, as it's doing some
|
||||
* operations even after calling our detach callback. Simply
|
||||
* cancell the alarm - if it started executing already, this
|
||||
* cancel the alarm - if it started executing already, this
|
||||
* call will block and wait for it to finish.
|
||||
*/
|
||||
rte_eal_alarm_cancel(spdk_detach_rte_cb, rte_dev);
|
||||
@ -163,6 +163,8 @@ spdk_detach_rte(struct spdk_pci_device *dev)
|
||||
if (!removed) {
|
||||
fprintf(stderr, "Timeout waiting for DPDK to remove PCI device %s.\n",
|
||||
rte_dev->name);
|
||||
/* If we reach this state, then the device couldn't be removed and most likely
|
||||
a subsequent hot add of a device in the same BDF will fail */
|
||||
}
|
||||
} else {
|
||||
spdk_detach_rte_cb(rte_dev);
|
||||
|
Loading…
Reference in New Issue
Block a user