diff --git a/CHANGELOG.md b/CHANGELOG.md index f32e4e542..552c11654 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,9 @@ performance. New API spdk_nvme_ctrlr_get_flags() was added. +NVMe hotplug poller is now able to detach devices hotremoved from the system +via `/sys/bus/pci/devices//remove` and `/sys/bus/pci/devices//driver/unbind`. + ### raid Added new strip_size_kb rpc param on create to replace the more ambiguous @@ -60,6 +63,9 @@ Default size is 4096. The `phys_addr` parameter in spdk_malloc() and spdk_zmalloc() has been deprecated. For retrieving physical addresses, spdk_vtophys() should be used instead. +spdk_pci_device_is_removed() has been added to let the upper-layer SPDK drivers know +that device has a pending external hotremove request. + ### DPDK Dropped support for DPDK 17.07 and earlier, which SPDK won't even compile with right now. diff --git a/include/spdk/env.h b/include/spdk/env.h index 534983d78..8273f1e32 100644 --- a/include/spdk/env.h +++ b/include/spdk/env.h @@ -653,6 +653,7 @@ struct spdk_pci_device { struct _spdk_pci_device_internal { struct spdk_pci_driver *driver; bool attached; + bool pending_removal; TAILQ_ENTRY(spdk_pci_device) tailq; } internal; }; @@ -977,6 +978,19 @@ int spdk_pci_device_cfg_read32(struct spdk_pci_device *dev, uint32_t *value, uin */ int spdk_pci_device_cfg_write32(struct spdk_pci_device *dev, uint32_t value, uint32_t offset); +/** + * Check if device was requested to be removed from the process. This can be + * caused either by physical device hotremoval or OS-triggered removal. In the + * latter case, the device may continue to function properly even if this + * function returns \c true . The upper-layer driver may check this function + * periodically and eventually detach the device. + * + * \param dev PCI device. + * + * \return if device was requested to be removed + */ +bool spdk_pci_device_is_removed(struct spdk_pci_device *dev); + /** * Compare two PCI addresses. * diff --git a/lib/env_dpdk/pci.c b/lib/env_dpdk/pci.c index 657d95ecd..124735ae2 100644 --- a/lib/env_dpdk/pci.c +++ b/lib/env_dpdk/pci.c @@ -120,6 +120,42 @@ spdk_pci_driver_register(struct spdk_pci_driver *driver) TAILQ_INSERT_TAIL(&g_pci_drivers, driver, tailq); } +#if RTE_VERSION >= RTE_VERSION_NUM(18, 5, 0, 0) +static void +spdk_pci_device_rte_hotremove(const char *device_name, + enum rte_dev_event_type event, + void *cb_arg) +{ + struct spdk_pci_device *dev; + + if (event != RTE_DEV_EVENT_REMOVE) { + return; + } + + pthread_mutex_lock(&g_pci_mutex); + TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { + struct rte_pci_device *rte_dev = dev->dev_handle; + + if (strcmp(rte_dev->name, device_name) == 0) { + if (!dev->internal.pending_removal && + !dev->internal.attached) { + /* if device is not attached, we + * can remove it right away. + */ + spdk_detach_rte(dev); + } else { + /* otherwise we let the upper layers + * detach it first. + */ + dev->internal.pending_removal = true; + } + break; + } + } + pthread_mutex_unlock(&g_pci_mutex); +} +#endif + void spdk_pci_init(void) { @@ -145,6 +181,13 @@ spdk_pci_init(void) rte_pci_register(&driver->driver); } #endif + +#if RTE_VERSION >= RTE_VERSION_NUM(18, 5, 0, 0) + /* Register a single hotremove callback for all devices. */ + if (spdk_process_is_primary()) { + rte_dev_event_callback_register(NULL, spdk_pci_device_rte_hotremove, NULL); + } +#endif } void @@ -159,6 +202,12 @@ spdk_pci_fini(void) fprintf(stderr, "Device %s is still attached at shutdown!\n", bdf); } } + +#if RTE_VERSION >= RTE_VERSION_NUM(18, 5, 0, 0) + if (spdk_process_is_primary()) { + rte_dev_event_callback_unregister(NULL, spdk_pci_device_rte_hotremove, NULL); + } +#endif } int @@ -535,6 +584,12 @@ spdk_pci_device_get_addr(struct spdk_pci_device *dev) return dev->addr; } +bool +spdk_pci_device_is_removed(struct spdk_pci_device *dev) +{ + return dev->internal.pending_removal; +} + int spdk_pci_addr_compare(const struct spdk_pci_addr *a1, const struct spdk_pci_addr *a2) { diff --git a/lib/nvme/nvme_pcie.c b/lib/nvme/nvme_pcie.c index 21459716a..97ec78631 100644 --- a/lib/nvme/nvme_pcie.c +++ b/lib/nvme/nvme_pcie.c @@ -250,6 +250,13 @@ nvme_pcie_ctrlr_setup_signal(void) sigaction(SIGBUS, &sa, NULL); } +static inline struct nvme_pcie_ctrlr * +nvme_pcie_ctrlr(struct spdk_nvme_ctrlr *ctrlr) +{ + assert(ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE); + return SPDK_CONTAINEROF(ctrlr, struct nvme_pcie_ctrlr, ctrlr); +} + static int _nvme_pcie_hotplug_monitor(struct spdk_nvme_probe_ctx *probe_ctx) { @@ -298,30 +305,37 @@ _nvme_pcie_hotplug_monitor(struct spdk_nvme_probe_ctx *probe_ctx) /* This is a work around for vfio-attached device hot remove detection. */ TAILQ_FOREACH_SAFE(ctrlr, &g_spdk_nvme_driver->shared_attached_ctrlrs, tailq, tmp) { - /* NVMe controller BAR must be mapped to secondary process space before any access. */ + bool do_remove = false; + + if (ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) { + struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr); + + if (spdk_pci_device_is_removed(pctrlr->devhandle)) { + do_remove = true; + } + } + + /* NVMe controller BAR must be mapped in the current process before any access. */ proc = spdk_nvme_ctrlr_get_current_process(ctrlr); if (proc) { csts = spdk_nvme_ctrlr_get_regs_csts(ctrlr); if (csts.raw == 0xffffffffU) { - nvme_ctrlr_fail(ctrlr, true); - if (probe_ctx->remove_cb) { - nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock); - probe_ctx->remove_cb(probe_ctx->cb_ctx, ctrlr); - nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock); - } + do_remove = true; + } + } + + if (do_remove) { + nvme_ctrlr_fail(ctrlr, true); + if (probe_ctx->remove_cb) { + nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock); + probe_ctx->remove_cb(probe_ctx->cb_ctx, ctrlr); + nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock); } } } return 0; } -static inline struct nvme_pcie_ctrlr * -nvme_pcie_ctrlr(struct spdk_nvme_ctrlr *ctrlr) -{ - assert(ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE); - return SPDK_CONTAINEROF(ctrlr, struct nvme_pcie_ctrlr, ctrlr); -} - static inline struct nvme_pcie_qpair * nvme_pcie_qpair(struct spdk_nvme_qpair *qpair) {