pci: introduce pci hooks

Devices behind a VMD aren't visible directly on the PCI
bus. In order to support them, we'll need an additional
VMD driver that's going to enumerate the devices behind
it and hook those into the SPDK PCI layer.

We want those devices to be accessible with the same APIs
that are used to access physical PCI devices.

The physical devices are still created and managed by
DPDK, but additional devices can be now hooked externally.

The hook API slightly departs from how env layer worked
so far. Instead of keeping the generic hook functions
internal-only and adding per-driver (NVMe, I/OAT, Virtio)
public functions, this patch makes the generic hook API
public from the start. It accepts the device driver as
a parameter, which needs to be exposed now. That's why
spdk_pci_nvme_get_driver() is introduced. It's only the
NVMe driver that's exposed so far, but other drivers and
their attach APIs should eventually follow the same path.
The previous model really didn't scale well and there's
no need to stretch it further.

Change-Id: Iade018a43b1e23527bd2914be42b403551e73bb6
Signed-off-by: Darek Stojaczyk <dariusz.stojaczyk@intel.com>
Reviewed-on: https://review.gerrithub.io/435802
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Chandler-Test-Pool: SPDK Automated Test System <sys_sgsw@intel.com>
Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
This commit is contained in:
Darek Stojaczyk 2018-11-29 10:24:05 +01:00 committed by Jim Harris
parent 8d28807cd0
commit 2f36777194
3 changed files with 150 additions and 54 deletions

View File

@ -600,6 +600,16 @@ struct spdk_pci_device {
struct spdk_pci_id id;
int socket_id;
int (*map_bar)(struct spdk_pci_device *dev, uint32_t bar,
void **mapped_addr, uint64_t *phys_addr, uint64_t *size);
int (*unmap_bar)(struct spdk_pci_device *dev, uint32_t bar,
void *addr);
int (*cfg_read)(struct spdk_pci_device *dev, void *value,
uint32_t len, uint32_t offset);
int (*cfg_write)(struct spdk_pci_device *dev, void *value,
uint32_t len, uint32_t offset);
void (*detach)(struct spdk_pci_device *dev);
struct _spdk_pci_device_internal {
struct spdk_pci_driver *driver;
bool attached;
@ -609,6 +619,13 @@ struct spdk_pci_device {
typedef int (*spdk_pci_enum_cb)(void *enum_ctx, struct spdk_pci_device *pci_dev);
/**
* Get the NVMe PCI driver object.
*
* \return PCI driver.
*/
struct spdk_pci_driver *spdk_pci_nvme_get_driver(void);
/**
* Enumerate all NVMe devices on the PCI bus and try to attach those that
* weren't attached yet. The provided callback will be called for each such
@ -968,6 +985,23 @@ int spdk_pci_addr_parse(struct spdk_pci_addr *addr, const char *bdf);
*/
int spdk_pci_addr_fmt(char *bdf, size_t sz, const struct spdk_pci_addr *addr);
/**
* Hook a custom PCI device into the PCI layer. The device will be attachable,
* enumerable, and will call provided callbacks on each PCI resource access
* request.
*
* \param drv driver that will be able to attach the device
* \param dev fully initialized PCI device struct
*/
void spdk_pci_hook_device(struct spdk_pci_driver *drv, struct spdk_pci_device *dev);
/**
* Un-hook a custom PCI device from the PCI layer. The device must not be attached.
*
* \param dev fully initialized PCI device struct
*/
void spdk_pci_unhook_device(struct spdk_pci_device *dev);
/**
* Remove any CPU affinity from the current thread.
*/

View File

@ -49,6 +49,84 @@ static pthread_mutex_t g_pci_mutex = PTHREAD_MUTEX_INITIALIZER;
static TAILQ_HEAD(, spdk_pci_device) g_pci_devices = TAILQ_HEAD_INITIALIZER(g_pci_devices);
static TAILQ_HEAD(, spdk_pci_driver) g_pci_drivers = TAILQ_HEAD_INITIALIZER(g_pci_drivers);
static int
spdk_map_bar_rte(struct spdk_pci_device *device, uint32_t bar,
void **mapped_addr, uint64_t *phys_addr, uint64_t *size)
{
struct rte_pci_device *dev = device->dev_handle;
*mapped_addr = dev->mem_resource[bar].addr;
*phys_addr = (uint64_t)dev->mem_resource[bar].phys_addr;
*size = (uint64_t)dev->mem_resource[bar].len;
return 0;
}
static int
spdk_unmap_bar_rte(struct spdk_pci_device *device, uint32_t bar, void *addr)
{
return 0;
}
static int
spdk_cfg_read_rte(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset)
{
int rc;
#if RTE_VERSION >= RTE_VERSION_NUM(17, 05, 0, 4)
rc = rte_pci_read_config(dev->dev_handle, value, len, offset);
#else
rc = rte_eal_pci_read_config(dev->dev_handle, value, len, offset);
#endif
#if defined(__FreeBSD__) && RTE_VERSION < RTE_VERSION_NUM(18, 11, 0, 0)
/* Older DPDKs return 0 on success and -1 on failure */
return rc;
#endif
return (rc > 0 && (uint32_t) rc == len) ? 0 : -1;
}
static int
spdk_cfg_write_rte(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset)
{
int rc;
#if RTE_VERSION >= RTE_VERSION_NUM(17, 05, 0, 4)
rc = rte_pci_write_config(dev->dev_handle, value, len, offset);
#else
rc = rte_eal_pci_write_config(dev->dev_handle, value, len, offset);
#endif
#ifdef __FreeBSD__
/* DPDK returns 0 on success and -1 on failure */
return rc;
#endif
return (rc > 0 && (uint32_t) rc == len) ? 0 : -1;
}
static void
spdk_detach_rte(struct spdk_pci_device *dev)
{
struct rte_pci_device *rte_dev = dev->dev_handle;
#if RTE_VERSION >= RTE_VERSION_NUM(18, 11, 0, 0)
char bdf[32];
int i = 0, rc;
snprintf(bdf, sizeof(bdf), "%s", rte_dev->device.name);
do {
rc = rte_eal_hotplug_remove("pci", bdf);
} while (rc == -ENOMSG && ++i <= DPDK_HOTPLUG_RETRY_COUNT);
#elif RTE_VERSION >= RTE_VERSION_NUM(17, 11, 0, 3)
rte_eal_dev_detach(&rte_dev->device);
#elif RTE_VERSION >= RTE_VERSION_NUM(17, 05, 0, 4)
rte_pci_detach(&rte_dev->addr);
#else
rte_eal_device_remove(&rte_dev->device);
rte_eal_pci_detach(&rte_dev->addr);
#endif
}
void
spdk_pci_driver_register(struct spdk_pci_driver *driver)
{
@ -119,6 +197,12 @@ spdk_pci_device_init(struct rte_pci_driver *_drv,
dev->id.subdevice_id = _dev->id.subsystem_device_id;
dev->socket_id = _dev->device.numa_node;
dev->map_bar = spdk_map_bar_rte;
dev->unmap_bar = spdk_unmap_bar_rte;
dev->cfg_read = spdk_cfg_read_rte;
dev->cfg_write = spdk_cfg_write_rte;
dev->detach = spdk_detach_rte;
dev->internal.driver = driver;
if (driver->cb_fn != NULL) {
@ -161,26 +245,9 @@ spdk_pci_device_fini(struct rte_pci_device *_dev)
void
spdk_pci_device_detach(struct spdk_pci_device *dev)
{
struct rte_pci_device *device = dev->dev_handle;
assert(dev->internal.attached);
dev->internal.attached = false;
#if RTE_VERSION >= RTE_VERSION_NUM(18, 11, 0, 0)
char bdf[32];
int i = 0, rc;
snprintf(bdf, sizeof(bdf), "%s", device->device.name);
do {
rc = rte_eal_hotplug_remove("pci", bdf);
} while (rc == -ENOMSG && ++i <= DPDK_HOTPLUG_RETRY_COUNT);
#elif RTE_VERSION >= RTE_VERSION_NUM(17, 11, 0, 3)
rte_eal_dev_detach(&device->device);
#elif RTE_VERSION >= RTE_VERSION_NUM(17, 05, 0, 4)
rte_pci_detach(&device->addr);
#else
rte_eal_device_remove(&device->device);
rte_eal_pci_detach(&device->addr);
#endif
dev->detach(dev);
}
int
@ -326,22 +393,16 @@ spdk_pci_enumerate(struct spdk_pci_driver *driver,
}
int
spdk_pci_device_map_bar(struct spdk_pci_device *device, uint32_t bar,
spdk_pci_device_map_bar(struct spdk_pci_device *dev, uint32_t bar,
void **mapped_addr, uint64_t *phys_addr, uint64_t *size)
{
struct rte_pci_device *dev = device->dev_handle;
*mapped_addr = dev->mem_resource[bar].addr;
*phys_addr = (uint64_t)dev->mem_resource[bar].phys_addr;
*size = (uint64_t)dev->mem_resource[bar].len;
return 0;
return dev->map_bar(dev, bar, mapped_addr, phys_addr, size);
}
int
spdk_pci_device_unmap_bar(struct spdk_pci_device *device, uint32_t bar, void *addr)
spdk_pci_device_unmap_bar(struct spdk_pci_device *dev, uint32_t bar, void *addr)
{
return 0;
return dev->unmap_bar(dev, bar, addr);
}
uint32_t
@ -407,37 +468,13 @@ spdk_pci_device_get_socket_id(struct spdk_pci_device *dev)
int
spdk_pci_device_cfg_read(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset)
{
int rc;
#if RTE_VERSION >= RTE_VERSION_NUM(17, 05, 0, 4)
rc = rte_pci_read_config(dev->dev_handle, value, len, offset);
#else
rc = rte_eal_pci_read_config(dev->dev_handle, value, len, offset);
#endif
#if defined(__FreeBSD__) && RTE_VERSION < RTE_VERSION_NUM(18, 11, 0, 0)
/* Older DPDKs return 0 on success and -1 on failure */
return rc;
#endif
return (rc > 0 && (uint32_t) rc == len) ? 0 : -1;
return dev->cfg_read(dev, value, len, offset);
}
int
spdk_pci_device_cfg_write(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset)
{
int rc;
#if RTE_VERSION >= RTE_VERSION_NUM(17, 05, 0, 4)
rc = rte_pci_write_config(dev->dev_handle, value, len, offset);
#else
rc = rte_eal_pci_write_config(dev->dev_handle, value, len, offset);
#endif
#ifdef __FreeBSD__
/* DPDK returns 0 on success and -1 on failure */
return rc;
#endif
return (rc > 0 && (uint32_t) rc == len) ? 0 : -1;
return dev->cfg_write(dev, value, len, offset);
}
int
@ -667,3 +704,22 @@ spdk_pci_addr_fmt(char *bdf, size_t sz, const struct spdk_pci_addr *addr)
return -1;
}
void
spdk_pci_hook_device(struct spdk_pci_driver *drv, struct spdk_pci_device *dev)
{
assert(dev->map_bar != NULL);
assert(dev->unmap_bar != NULL);
assert(dev->cfg_read != NULL);
assert(dev->cfg_write != NULL);
assert(dev->detach != NULL);
dev->internal.driver = drv;
TAILQ_INSERT_TAIL(&g_pci_devices, dev, internal.tailq);
}
void
spdk_pci_unhook_device(struct spdk_pci_device *dev)
{
assert(!dev->internal.attached);
TAILQ_REMOVE(&g_pci_devices, dev, internal.tailq);
}

View File

@ -77,4 +77,10 @@ spdk_pci_nvme_enumerate(spdk_pci_enum_cb enum_cb, void *enum_ctx)
return spdk_pci_enumerate(&g_nvme_pci_drv, enum_cb, enum_ctx);
}
struct spdk_pci_driver *
spdk_pci_nvme_get_driver(void)
{
return &g_nvme_pci_drv;
}
SPDK_PMD_REGISTER_PCI(g_nvme_pci_drv);