From 7c6f0ef0019efb9a60b64fd1e191246174680eaf Mon Sep 17 00:00:00 2001 From: Darek Stojaczyk Date: Tue, 7 Apr 2020 13:57:43 +0200 Subject: [PATCH] env_dpdk/pci: fix segfault on simultaneous VFIO hotremove and user detach There was a chance we scheduled a device removal to the DPDK thread while that thread was already removing the device from a VFIO hotremove notification (on the DPDK interrupt thread). The second hotremove attempt touches some freed memory and segfaults. The VFIO hotremove notification already checks pending_removal flag under a mutex and sets it to true, so do the same in spdk_detach_rte() (called from the SPDK init thread). Change-Id: Ib3f0eb7c0c5c6e1ab8cf253b7711fd149925a143 Signed-off-by: Darek Stojaczyk Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/1730 Reviewed-by: Aleksey Marchuk Reviewed-by: Jim Harris Reviewed-by: Ben Walker Reviewed-by: Paul Luse Reviewed-by: Michael Haeuptle Tested-by: SPDK CI Jenkins Community-CI: Mellanox Build Bot --- lib/env_dpdk/pci.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/lib/env_dpdk/pci.c b/lib/env_dpdk/pci.c index f90ef946f..1bce2010e 100644 --- a/lib/env_dpdk/pci.c +++ b/lib/env_dpdk/pci.c @@ -130,17 +130,19 @@ detach_rte(struct spdk_pci_device *dev) int i; bool removed; - /* The device was already marked as available and could be attached - * again while we go asynchronous, so we explicitly forbid that. - */ - dev->internal.pending_removal = true; if (!spdk_process_is_primary()) { remove_rte_dev(rte_dev); return; } + pthread_mutex_lock(&g_pci_mutex); + /* prevent the hotremove notification from removing this device */ + dev->internal.pending_removal = true; + pthread_mutex_unlock(&g_pci_mutex); + rte_eal_alarm_set(1, detach_rte_cb, rte_dev); - /* wait up to 2s for the cb to finish executing */ + + /* wait up to 2s for the cb to execute */ for (i = 2000; i > 0; i--) { spdk_delay_us(1000); @@ -209,7 +211,9 @@ pci_device_rte_hotremove(const char *device_name, pthread_mutex_unlock(&g_pci_mutex); if (dev != NULL && can_detach) { - /* if device is not attached we can remove it right away. */ + /* if device is not attached we can remove it right away. + * Otherwise it will be removed at detach. + */ remove_rte_dev(dev->dev_handle); } }