env/dpdk: register VFIO hotremove callback
This is an attempt to fix device hotremove with VFIO. A soft device hotremove request through sysfs [1] would currently just block until the SPDK process manually releases that device - e.g. upon an RPC request. VFIO won't get unbound from the device untill userspace releases all its resources. VFIO can signal a pending hotremove request by kicking any file descriptor provided by the userspace - and DPDK does provide such descriptor - but SPDK does not listen on it. DPDK does offer handy API to listen and in this patch we make use of it inside our env/pci layer. Within a DPDK callback we set an internal per-device hotremove flag, which upper-layer SPDK drivers can poll with a new env API - spdk_pci_device_is_removed(). The VFIO hotremove event will be sent to primary processes only, so that's where we listen. We make use of this new API in the NVMe hotplug poller, which will process it just like any other supported hotremove event. Fixes #595 Fixes #690 [1] # echo 1 > /sys/bus/pci/devices/<bdf>/remove Change-Id: I03d88271c2089c740e232056d9340e5a640d442c Signed-off-by: Darek Stojaczyk <dariusz.stojaczyk@intel.com> Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/448927 Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com> Reviewed-by: Jim Harris <james.r.harris@intel.com>
This commit is contained in:
parent
468c6c18bd
commit
81523d9dd2
@ -25,6 +25,9 @@ performance.
|
||||
|
||||
New API spdk_nvme_ctrlr_get_flags() was added.
|
||||
|
||||
NVMe hotplug poller is now able to detach devices hotremoved from the system
|
||||
via `/sys/bus/pci/devices/<bdf>/remove` and `/sys/bus/pci/devices/<bdf>/driver/unbind`.
|
||||
|
||||
### raid
|
||||
|
||||
Added new strip_size_kb rpc param on create to replace the more ambiguous
|
||||
@ -60,6 +63,9 @@ Default size is 4096.
|
||||
The `phys_addr` parameter in spdk_malloc() and spdk_zmalloc() has been deprecated.
|
||||
For retrieving physical addresses, spdk_vtophys() should be used instead.
|
||||
|
||||
spdk_pci_device_is_removed() has been added to let the upper-layer SPDK drivers know
|
||||
that device has a pending external hotremove request.
|
||||
|
||||
### DPDK
|
||||
|
||||
Dropped support for DPDK 17.07 and earlier, which SPDK won't even compile with right now.
|
||||
|
@ -653,6 +653,7 @@ struct spdk_pci_device {
|
||||
struct _spdk_pci_device_internal {
|
||||
struct spdk_pci_driver *driver;
|
||||
bool attached;
|
||||
bool pending_removal;
|
||||
TAILQ_ENTRY(spdk_pci_device) tailq;
|
||||
} internal;
|
||||
};
|
||||
@ -977,6 +978,19 @@ int spdk_pci_device_cfg_read32(struct spdk_pci_device *dev, uint32_t *value, uin
|
||||
*/
|
||||
int spdk_pci_device_cfg_write32(struct spdk_pci_device *dev, uint32_t value, uint32_t offset);
|
||||
|
||||
/**
|
||||
* Check if device was requested to be removed from the process. This can be
|
||||
* caused either by physical device hotremoval or OS-triggered removal. In the
|
||||
* latter case, the device may continue to function properly even if this
|
||||
* function returns \c true . The upper-layer driver may check this function
|
||||
* periodically and eventually detach the device.
|
||||
*
|
||||
* \param dev PCI device.
|
||||
*
|
||||
* \return if device was requested to be removed
|
||||
*/
|
||||
bool spdk_pci_device_is_removed(struct spdk_pci_device *dev);
|
||||
|
||||
/**
|
||||
* Compare two PCI addresses.
|
||||
*
|
||||
|
@ -120,6 +120,42 @@ spdk_pci_driver_register(struct spdk_pci_driver *driver)
|
||||
TAILQ_INSERT_TAIL(&g_pci_drivers, driver, tailq);
|
||||
}
|
||||
|
||||
#if RTE_VERSION >= RTE_VERSION_NUM(18, 5, 0, 0)
|
||||
static void
|
||||
spdk_pci_device_rte_hotremove(const char *device_name,
|
||||
enum rte_dev_event_type event,
|
||||
void *cb_arg)
|
||||
{
|
||||
struct spdk_pci_device *dev;
|
||||
|
||||
if (event != RTE_DEV_EVENT_REMOVE) {
|
||||
return;
|
||||
}
|
||||
|
||||
pthread_mutex_lock(&g_pci_mutex);
|
||||
TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) {
|
||||
struct rte_pci_device *rte_dev = dev->dev_handle;
|
||||
|
||||
if (strcmp(rte_dev->name, device_name) == 0) {
|
||||
if (!dev->internal.pending_removal &&
|
||||
!dev->internal.attached) {
|
||||
/* if device is not attached, we
|
||||
* can remove it right away.
|
||||
*/
|
||||
spdk_detach_rte(dev);
|
||||
} else {
|
||||
/* otherwise we let the upper layers
|
||||
* detach it first.
|
||||
*/
|
||||
dev->internal.pending_removal = true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
pthread_mutex_unlock(&g_pci_mutex);
|
||||
}
|
||||
#endif
|
||||
|
||||
void
|
||||
spdk_pci_init(void)
|
||||
{
|
||||
@ -145,6 +181,13 @@ spdk_pci_init(void)
|
||||
rte_pci_register(&driver->driver);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if RTE_VERSION >= RTE_VERSION_NUM(18, 5, 0, 0)
|
||||
/* Register a single hotremove callback for all devices. */
|
||||
if (spdk_process_is_primary()) {
|
||||
rte_dev_event_callback_register(NULL, spdk_pci_device_rte_hotremove, NULL);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
@ -159,6 +202,12 @@ spdk_pci_fini(void)
|
||||
fprintf(stderr, "Device %s is still attached at shutdown!\n", bdf);
|
||||
}
|
||||
}
|
||||
|
||||
#if RTE_VERSION >= RTE_VERSION_NUM(18, 5, 0, 0)
|
||||
if (spdk_process_is_primary()) {
|
||||
rte_dev_event_callback_unregister(NULL, spdk_pci_device_rte_hotremove, NULL);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
int
|
||||
@ -535,6 +584,12 @@ spdk_pci_device_get_addr(struct spdk_pci_device *dev)
|
||||
return dev->addr;
|
||||
}
|
||||
|
||||
bool
|
||||
spdk_pci_device_is_removed(struct spdk_pci_device *dev)
|
||||
{
|
||||
return dev->internal.pending_removal;
|
||||
}
|
||||
|
||||
int
|
||||
spdk_pci_addr_compare(const struct spdk_pci_addr *a1, const struct spdk_pci_addr *a2)
|
||||
{
|
||||
|
@ -250,6 +250,13 @@ nvme_pcie_ctrlr_setup_signal(void)
|
||||
sigaction(SIGBUS, &sa, NULL);
|
||||
}
|
||||
|
||||
static inline struct nvme_pcie_ctrlr *
|
||||
nvme_pcie_ctrlr(struct spdk_nvme_ctrlr *ctrlr)
|
||||
{
|
||||
assert(ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE);
|
||||
return SPDK_CONTAINEROF(ctrlr, struct nvme_pcie_ctrlr, ctrlr);
|
||||
}
|
||||
|
||||
static int
|
||||
_nvme_pcie_hotplug_monitor(struct spdk_nvme_probe_ctx *probe_ctx)
|
||||
{
|
||||
@ -298,11 +305,26 @@ _nvme_pcie_hotplug_monitor(struct spdk_nvme_probe_ctx *probe_ctx)
|
||||
|
||||
/* This is a work around for vfio-attached device hot remove detection. */
|
||||
TAILQ_FOREACH_SAFE(ctrlr, &g_spdk_nvme_driver->shared_attached_ctrlrs, tailq, tmp) {
|
||||
/* NVMe controller BAR must be mapped to secondary process space before any access. */
|
||||
bool do_remove = false;
|
||||
|
||||
if (ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) {
|
||||
struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr);
|
||||
|
||||
if (spdk_pci_device_is_removed(pctrlr->devhandle)) {
|
||||
do_remove = true;
|
||||
}
|
||||
}
|
||||
|
||||
/* NVMe controller BAR must be mapped in the current process before any access. */
|
||||
proc = spdk_nvme_ctrlr_get_current_process(ctrlr);
|
||||
if (proc) {
|
||||
csts = spdk_nvme_ctrlr_get_regs_csts(ctrlr);
|
||||
if (csts.raw == 0xffffffffU) {
|
||||
do_remove = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (do_remove) {
|
||||
nvme_ctrlr_fail(ctrlr, true);
|
||||
if (probe_ctx->remove_cb) {
|
||||
nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
|
||||
@ -311,17 +333,9 @@ _nvme_pcie_hotplug_monitor(struct spdk_nvme_probe_ctx *probe_ctx)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline struct nvme_pcie_ctrlr *
|
||||
nvme_pcie_ctrlr(struct spdk_nvme_ctrlr *ctrlr)
|
||||
{
|
||||
assert(ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE);
|
||||
return SPDK_CONTAINEROF(ctrlr, struct nvme_pcie_ctrlr, ctrlr);
|
||||
}
|
||||
|
||||
static inline struct nvme_pcie_qpair *
|
||||
nvme_pcie_qpair(struct spdk_nvme_qpair *qpair)
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user