env_dpdk/pci: fix segfault on simultaneous VFIO hotremove and user detach
There was a chance we scheduled a device removal to the DPDK thread while that thread was already removing the device from a VFIO hotremove notification (on the DPDK interrupt thread). The second hotremove attempt touches some freed memory and segfaults. The VFIO hotremove notification already checks pending_removal flag under a mutex and sets it to true, so do the same in spdk_detach_rte() (called from the SPDK init thread). Change-Id: Ib3f0eb7c0c5c6e1ab8cf253b7711fd149925a143 Signed-off-by: Darek Stojaczyk <dariusz.stojaczyk@intel.com> Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/1730 Reviewed-by: Aleksey Marchuk <alexeymar@mellanox.com> Reviewed-by: Jim Harris <james.r.harris@intel.com> Reviewed-by: Ben Walker <benjamin.walker@intel.com> Reviewed-by: Paul Luse <paul.e.luse@intel.com> Reviewed-by: Michael Haeuptle <michaelhaeuptle@gmail.com> Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Community-CI: Mellanox Build Bot
This commit is contained in:
parent
d3bcd1ca5b
commit
7c6f0ef001
@ -130,17 +130,19 @@ detach_rte(struct spdk_pci_device *dev)
|
||||
int i;
|
||||
bool removed;
|
||||
|
||||
/* The device was already marked as available and could be attached
|
||||
* again while we go asynchronous, so we explicitly forbid that.
|
||||
*/
|
||||
dev->internal.pending_removal = true;
|
||||
if (!spdk_process_is_primary()) {
|
||||
remove_rte_dev(rte_dev);
|
||||
return;
|
||||
}
|
||||
|
||||
pthread_mutex_lock(&g_pci_mutex);
|
||||
/* prevent the hotremove notification from removing this device */
|
||||
dev->internal.pending_removal = true;
|
||||
pthread_mutex_unlock(&g_pci_mutex);
|
||||
|
||||
rte_eal_alarm_set(1, detach_rte_cb, rte_dev);
|
||||
/* wait up to 2s for the cb to finish executing */
|
||||
|
||||
/* wait up to 2s for the cb to execute */
|
||||
for (i = 2000; i > 0; i--) {
|
||||
|
||||
spdk_delay_us(1000);
|
||||
@ -209,7 +211,9 @@ pci_device_rte_hotremove(const char *device_name,
|
||||
pthread_mutex_unlock(&g_pci_mutex);
|
||||
|
||||
if (dev != NULL && can_detach) {
|
||||
/* if device is not attached we can remove it right away. */
|
||||
/* if device is not attached we can remove it right away.
|
||||
* Otherwise it will be removed at detach.
|
||||
*/
|
||||
remove_rte_dev(dev->dev_handle);
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user