nvme/pcie: dont initiate hotremoval on SIGBUS for VFIO devices
This is an attempt to workaround a kernel data race spotted in #1275. We have to delay our VFIO interraction until after the kernel handles the PCIe hotremoval. Otherwise we risk having a deadlock inside the kernel. We could put a dummy delay in SPDK hotremove routine, but we don't know exactly how long we have to wait, so wait for the VFIO hotremove notification to arrive instead. From what I understand it is always sent after the kerner already releases the critical locks. This patch breaks hotremove for VFIO devices when running with DPDK version < 18.05, but those reached EOL already and their support is removed from SPDK later within this series. It would be nice to wait for this VFIO notification inside lib/env_dpdk e.g. inside the detach function, but we don't know there why the detach is happening. It could be triggered by a user RPC, not physical device hotremoval. Change-Id: I2727de923bb2e3e05a83202465330a32a77d7cfc Signed-off-by: Darek Stojaczyk <dariusz.stojaczyk@intel.com> Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/1732 Community-CI: Mellanox Build Bot Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Jim Harris <james.r.harris@intel.com> Reviewed-by: Ben Walker <benjamin.walker@intel.com>
This commit is contained in:
parent
f280ce59b7
commit
64c4fafd32
@ -269,8 +269,6 @@ _nvme_pcie_hotplug_monitor(struct spdk_nvme_probe_ctx *probe_ctx)
|
|||||||
struct spdk_nvme_ctrlr *ctrlr, *tmp;
|
struct spdk_nvme_ctrlr *ctrlr, *tmp;
|
||||||
struct spdk_uevent event;
|
struct spdk_uevent event;
|
||||||
struct spdk_pci_addr pci_addr;
|
struct spdk_pci_addr pci_addr;
|
||||||
union spdk_nvme_csts_register csts;
|
|
||||||
struct spdk_nvme_ctrlr_process *proc;
|
|
||||||
|
|
||||||
if (g_spdk_nvme_driver->hotplug_fd < 0) {
|
if (g_spdk_nvme_driver->hotplug_fd < 0) {
|
||||||
return 0;
|
return 0;
|
||||||
@ -313,25 +311,20 @@ _nvme_pcie_hotplug_monitor(struct spdk_nvme_probe_ctx *probe_ctx)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* This is a work around for vfio-attached device hot remove detection. */
|
/* Initiate removal of physically hotremoved PCI controllers. Even after
|
||||||
|
* they're hotremoved from the system, SPDK might still report them via RPC.
|
||||||
|
*/
|
||||||
TAILQ_FOREACH_SAFE(ctrlr, &g_spdk_nvme_driver->shared_attached_ctrlrs, tailq, tmp) {
|
TAILQ_FOREACH_SAFE(ctrlr, &g_spdk_nvme_driver->shared_attached_ctrlrs, tailq, tmp) {
|
||||||
bool do_remove = false;
|
bool do_remove = false;
|
||||||
|
struct nvme_pcie_ctrlr *pctrlr;
|
||||||
|
|
||||||
if (ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) {
|
if (ctrlr->trid.trtype != SPDK_NVME_TRANSPORT_PCIE) {
|
||||||
struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr);
|
continue;
|
||||||
|
|
||||||
if (spdk_pci_device_is_removed(pctrlr->devhandle)) {
|
|
||||||
do_remove = true;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* NVMe controller BAR must be mapped in the current process before any access. */
|
pctrlr = nvme_pcie_ctrlr(ctrlr);
|
||||||
proc = nvme_ctrlr_get_current_process(ctrlr);
|
if (spdk_pci_device_is_removed(pctrlr->devhandle)) {
|
||||||
if (proc) {
|
do_remove = true;
|
||||||
csts = spdk_nvme_ctrlr_get_regs_csts(ctrlr);
|
|
||||||
if (csts.raw == 0xffffffffU) {
|
|
||||||
do_remove = true;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (do_remove) {
|
if (do_remove) {
|
||||||
|
Loading…
Reference in New Issue
Block a user