diff --git a/app/spdk_tgt/Makefile b/app/spdk_tgt/Makefile index 9a84bbf79..eade65000 100644 --- a/app/spdk_tgt/Makefile +++ b/app/spdk_tgt/Makefile @@ -24,6 +24,9 @@ SPDK_LIB_LIST += event_nbd ifeq ($(CONFIG_VHOST),y) SPDK_LIB_LIST += event_vhost_blk event_vhost_scsi endif +ifeq ($(CONFIG_VFIO_USER),y) +SPDK_LIB_LIST += event_vfu_tgt +endif endif include $(SPDK_ROOT_DIR)/mk/spdk.app.mk diff --git a/doc/jsonrpc.md b/doc/jsonrpc.md index 1f1c302e5..8170e9e45 100644 --- a/doc/jsonrpc.md +++ b/doc/jsonrpc.md @@ -7973,6 +7973,43 @@ crdt1 | Optional | number | Command Retry Delay Time 1 crdt2 | Optional | number | Command Retry Delay Time 2 crdt3 | Optional | number | Command Retry Delay Time 3 +## Vfio-user Target + +### vfu_tgt_set_base_path {#rpc_vfu_tgt_set_base_path} + +Set base path of Unix Domain socket file. + +#### Parameters + +Name | Optional | Type | Description +----------------------- | -------- | ----------- | ----------- +path | Required | string | Base path + +#### Example + +Example request: + +~~~json +{ + "params": { + "path": "/var/run/vfu_tgt" + }, + "jsonrpc": "2.0", + "method": "vfu_tgt_set_base_path", + "id": 1 +} +~~~ + +Example response: + +~~~json +{ + "jsonrpc": "2.0", + "id": 1, + "result": true +} +~~~ + ## Vhost Target {#jsonrpc_components_vhost_tgt} The following common preconditions need to be met in all target types. diff --git a/include/spdk/vfu_target.h b/include/spdk/vfu_target.h new file mode 100644 index 000000000..5f257628d --- /dev/null +++ b/include/spdk/vfu_target.h @@ -0,0 +1,121 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) Intel Corporation. + * All rights reserved. + */ + +#ifndef _VFU_TARGET_H +#define _VFU_TARGET_H + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef void (*spdk_vfu_init_cb)(int rc); +typedef void (*spdk_vfu_fini_cb)(void); + +void spdk_vfu_init(spdk_vfu_init_cb init_cb); +void spdk_vfu_fini(spdk_vfu_fini_cb fini_cb); + +struct spdk_vfu_endpoint; + +#define SPDK_VFU_MAX_NAME_LEN (64) + +struct spdk_vfu_sparse_mmap { + uint64_t offset; + uint64_t len; +}; + +#define SPDK_VFU_MAXIMUM_SPARSE_MMAP_REGIONS 8 + +typedef ssize_t (*spdk_vfu_access_cb)(vfu_ctx_t *vfu_ctx, char *buf, size_t count, loff_t pos, + bool is_write); + +struct spdk_vfu_pci_region { + uint64_t offset; + uint64_t len; + uint64_t flags; + uint32_t nr_sparse_mmaps; + int fd; + struct spdk_vfu_sparse_mmap mmaps[SPDK_VFU_MAXIMUM_SPARSE_MMAP_REGIONS]; + spdk_vfu_access_cb access_cb; +}; + +struct spdk_vfu_pci_device { + struct { + /* Vendor ID */ + uint16_t vid; + /* Device ID */ + uint16_t did; + /* Subsystem Vendor ID */ + uint16_t ssvid; + /* Subsystem ID */ + uint16_t ssid; + } id; + + struct { + /* Base Class Code */ + uint8_t bcc; + /* Sub Class code */ + uint8_t scc; + /* Programming Interface */ + uint8_t pi; + } class; + + /* Standard PCI Capabilities */ + struct pmcap pmcap; + struct pxcap pxcap; + struct msixcap msixcap; + uint16_t nr_vendor_caps; + + uint16_t intr_ipin; + uint32_t nr_int_irqs; + uint32_t nr_msix_irqs; + + struct spdk_vfu_pci_region regions[VFU_PCI_DEV_NUM_REGIONS]; +}; + +struct spdk_vfu_endpoint_ops { + /* PCI device type name */ + char name[SPDK_VFU_MAX_NAME_LEN]; + + void *(*init)(struct spdk_vfu_endpoint *endpoint, + char *basename, const char *endpoint_name); + int (*get_device_info)(struct spdk_vfu_endpoint *endpoint, + struct spdk_vfu_pci_device *device_info); + uint16_t (*get_vendor_capability)(struct spdk_vfu_endpoint *endpoint, char *buf, + uint16_t buf_len, uint16_t idx); + int (*attach_device)(struct spdk_vfu_endpoint *endpoint); + int (*detach_device)(struct spdk_vfu_endpoint *endpoint); + int (*destruct)(struct spdk_vfu_endpoint *endpoint); + + int (*post_memory_add)(struct spdk_vfu_endpoint *endpoint, void *map_start, void *map_end); + int (*pre_memory_remove)(struct spdk_vfu_endpoint *endpoint, void *map_start, void *map_end); + int (*reset_device)(struct spdk_vfu_endpoint *endpoint); + int (*quiesce_device)(struct spdk_vfu_endpoint *endpoint); +}; + +int spdk_vfu_register_endpoint_ops(struct spdk_vfu_endpoint_ops *ops); +int spdk_vfu_create_endpoint(const char *endpoint_name, const char *cpumask_str, + const char *dev_type_name); +int spdk_vfu_delete_endpoint(const char *endpoint_name); +int spdk_vfu_set_socket_path(const char *basename); +const char *spdk_vfu_get_endpoint_id(struct spdk_vfu_endpoint *endpoint); +const char *spdk_vfu_get_endpoint_name(struct spdk_vfu_endpoint *endpoint); +vfu_ctx_t *spdk_vfu_get_vfu_ctx(struct spdk_vfu_endpoint *endpoint); +void *spdk_vfu_get_endpoint_private(struct spdk_vfu_endpoint *endpoint); +bool spdk_vfu_endpoint_msix_enabled(struct spdk_vfu_endpoint *endpoint); +bool spdk_vfu_endpoint_intx_enabled(struct spdk_vfu_endpoint *endpoint); +void *spdk_vfu_endpoint_get_pci_config(struct spdk_vfu_endpoint *endpoint); +struct spdk_vfu_endpoint *spdk_vfu_get_endpoint_by_name(const char *name); +void *spdk_vfu_map_one(struct spdk_vfu_endpoint *endpoint, uint64_t addr, uint64_t len, + dma_sg_t *sg, struct iovec *iov, int prot); +void spdk_vfu_unmap_sg(struct spdk_vfu_endpoint *endpoint, dma_sg_t *sg, struct iovec *iov, + int iovcnt); +#ifdef __cplusplus +} +#endif + +#endif diff --git a/lib/Makefile b/lib/Makefile index 3d0054cc4..eba40f7eb 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -20,7 +20,7 @@ DIRS-$(CONFIG_VHOST) += vhost DIRS-$(CONFIG_VIRTIO) += virtio DIRS-$(CONFIG_REDUCE) += reduce DIRS-$(CONFIG_RDMA) += rdma -DIRS-$(CONFIG_VFIO_USER) += vfio_user +DIRS-$(CONFIG_VFIO_USER) += vfio_user vfu_tgt # If CONFIG_ENV is pointing at a directory in lib, build it. # Out-of-tree env implementations must be built separately by the user. diff --git a/lib/vfu_tgt/Makefile b/lib/vfu_tgt/Makefile new file mode 100644 index 000000000..95b6d07c6 --- /dev/null +++ b/lib/vfu_tgt/Makefile @@ -0,0 +1,21 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) Intel Corporation. +# All rights reserved. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk + +SO_VER := 1 +SO_MINOR := 0 + +C_SRCS += tgt_endpoint.c tgt_rpc.c +CFLAGS += -I$(VFIO_USER_INCLUDE_DIR) +LDFLAGS += -L$(VFIO_USER_LIBRARY_DIR) +LOCAL_SYS_LIBS += -lvfio-user -ljson-c + +LIBNAME = vfu_tgt + +SPDK_MAP_FILE = $(abspath $(CURDIR)/spdk_vfu_tgt.map) + +include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk diff --git a/lib/vfu_tgt/spdk_vfu_tgt.map b/lib/vfu_tgt/spdk_vfu_tgt.map new file mode 100644 index 000000000..28563b28d --- /dev/null +++ b/lib/vfu_tgt/spdk_vfu_tgt.map @@ -0,0 +1,23 @@ +{ + global: + + # public functions from vfu_target.h + spdk_vfu_init; + spdk_vfu_fini; + spdk_vfu_set_socket_path; + spdk_vfu_register_endpoint_ops; + spdk_vfu_create_endpoint; + spdk_vfu_delete_endpoint; + spdk_vfu_get_endpoint_id; + spdk_vfu_get_endpoint_name; + spdk_vfu_get_endpoint_by_name; + spdk_vfu_get_vfu_ctx; + spdk_vfu_get_endpoint_private; + spdk_vfu_endpoint_get_pci_config; + spdk_vfu_map_one; + spdk_vfu_unmap_sg; + spdk_vfu_endpoint_msix_enabled; + spdk_vfu_endpoint_intx_enabled; + + local: *; +}; diff --git a/lib/vfu_tgt/tgt_endpoint.c b/lib/vfu_tgt/tgt_endpoint.c new file mode 100644 index 000000000..f6b8bc2df --- /dev/null +++ b/lib/vfu_tgt/tgt_endpoint.c @@ -0,0 +1,787 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) Intel Corporation. + * All rights reserved. + */ + +#include "spdk/stdinc.h" +#include "spdk/env.h" +#include "spdk/thread.h" +#include "spdk/log.h" +#include "spdk/util.h" +#include "spdk/memory.h" +#include "spdk/cpuset.h" +#include "spdk/likely.h" +#include "spdk/vfu_target.h" + +#include "tgt_internal.h" + +struct tgt_pci_device_ops { + struct spdk_vfu_endpoint_ops ops; + TAILQ_ENTRY(tgt_pci_device_ops) link; +}; + +static struct spdk_cpuset g_tgt_core_mask; +static pthread_mutex_t g_endpoint_lock = PTHREAD_MUTEX_INITIALIZER; +static TAILQ_HEAD(, spdk_vfu_endpoint) g_endpoint = TAILQ_HEAD_INITIALIZER(g_endpoint); +static TAILQ_HEAD(, tgt_pci_device_ops) g_pci_device_ops = TAILQ_HEAD_INITIALIZER(g_pci_device_ops); +static char g_endpoint_path_dirname[PATH_MAX] = ""; + +static struct spdk_vfu_endpoint_ops * +tgt_get_pci_device_ops(const char *device_type_name) +{ + struct tgt_pci_device_ops *pci_ops, *tmp; + bool exist = false; + + pthread_mutex_lock(&g_endpoint_lock); + TAILQ_FOREACH_SAFE(pci_ops, &g_pci_device_ops, link, tmp) { + if (!strncmp(device_type_name, pci_ops->ops.name, SPDK_VFU_MAX_NAME_LEN)) { + exist = true; + break; + } + } + pthread_mutex_unlock(&g_endpoint_lock); + + if (exist) { + return &pci_ops->ops; + } + return NULL; +} + +int +spdk_vfu_register_endpoint_ops(struct spdk_vfu_endpoint_ops *ops) +{ + struct tgt_pci_device_ops *pci_ops; + struct spdk_vfu_endpoint_ops *tmp; + + tmp = tgt_get_pci_device_ops(ops->name); + if (tmp) { + return -EEXIST; + } + + pci_ops = calloc(1, sizeof(*pci_ops)); + if (!pci_ops) { + return -ENOMEM; + } + pci_ops->ops = *ops; + + pthread_mutex_lock(&g_endpoint_lock); + TAILQ_INSERT_TAIL(&g_pci_device_ops, pci_ops, link); + pthread_mutex_unlock(&g_endpoint_lock); + + return 0; +} + +static char * +tgt_get_base_path(void) +{ + return g_endpoint_path_dirname; +} + +int +spdk_vfu_set_socket_path(const char *basename) +{ + int ret; + + if (basename && strlen(basename) > 0) { + ret = snprintf(g_endpoint_path_dirname, sizeof(g_endpoint_path_dirname) - 2, "%s", basename); + if (ret <= 0) { + return -EINVAL; + } + if ((size_t)ret >= sizeof(g_endpoint_path_dirname) - 2) { + SPDK_ERRLOG("Char dev dir path length %d is too long\n", ret); + return -EINVAL; + } + + if (g_endpoint_path_dirname[ret - 1] != '/') { + g_endpoint_path_dirname[ret] = '/'; + g_endpoint_path_dirname[ret + 1] = '\0'; + } + } + + return 0; +} + +struct spdk_vfu_endpoint * +spdk_vfu_get_endpoint_by_name(const char *name) +{ + struct spdk_vfu_endpoint *endpoint, *tmp; + bool exist = false; + + pthread_mutex_lock(&g_endpoint_lock); + TAILQ_FOREACH_SAFE(endpoint, &g_endpoint, link, tmp) { + if (!strncmp(name, endpoint->name, SPDK_VFU_MAX_NAME_LEN)) { + exist = true; + break; + } + } + pthread_mutex_unlock(&g_endpoint_lock); + + if (exist) { + return endpoint; + } + return NULL; +} + +static int +tgt_vfu_ctx_poller(void *ctx) +{ + struct spdk_vfu_endpoint *endpoint = ctx; + vfu_ctx_t *vfu_ctx = endpoint->vfu_ctx; + int ret; + + ret = vfu_run_ctx(vfu_ctx); + if (spdk_unlikely(ret == -1)) { + if (errno == EBUSY) { + return SPDK_POLLER_IDLE; + } + + if (errno == ENOTCONN) { + spdk_poller_unregister(&endpoint->vfu_ctx_poller); + if (endpoint->ops.detach_device) { + endpoint->ops.detach_device(endpoint); + } + endpoint->is_attached = false; + return SPDK_POLLER_BUSY; + } + } + + return ret != 0 ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE; +} + +static int +tgt_accept_poller(void *ctx) +{ + struct spdk_vfu_endpoint *endpoint = ctx; + int ret; + + if (endpoint->is_attached) { + return SPDK_POLLER_IDLE; + } + + ret = vfu_attach_ctx(endpoint->vfu_ctx); + if (ret == 0) { + ret = endpoint->ops.attach_device(endpoint); + if (!ret) { + SPDK_NOTICELOG("%s: attached successfully\n", spdk_vfu_get_endpoint_id(endpoint)); + /* Polling socket too frequently will cause performance issue */ + endpoint->vfu_ctx_poller = SPDK_POLLER_REGISTER(tgt_vfu_ctx_poller, endpoint, 1000); + endpoint->is_attached = true; + } + return SPDK_POLLER_BUSY; + } + + if (errno == EAGAIN || errno == EWOULDBLOCK) { + return SPDK_POLLER_IDLE; + } + + return SPDK_POLLER_BUSY; +} + +static void +tgt_log_cb(vfu_ctx_t *vfu_ctx, int level, char const *msg) +{ + struct spdk_vfu_endpoint *endpoint = vfu_get_private(vfu_ctx); + + if (level >= LOG_DEBUG) { + SPDK_DEBUGLOG(vfu, "%s: %s\n", spdk_vfu_get_endpoint_id(endpoint), msg); + } else if (level >= LOG_INFO) { + SPDK_INFOLOG(vfu, "%s: %s\n", spdk_vfu_get_endpoint_id(endpoint), msg); + } else if (level >= LOG_NOTICE) { + SPDK_NOTICELOG("%s: %s\n", spdk_vfu_get_endpoint_id(endpoint), msg); + } else if (level >= LOG_WARNING) { + SPDK_WARNLOG("%s: %s\n", spdk_vfu_get_endpoint_id(endpoint), msg); + } else { + SPDK_ERRLOG("%s: %s\n", spdk_vfu_get_endpoint_id(endpoint), msg); + } +} + +static int +tgt_get_log_level(void) +{ + int level; + + if (SPDK_DEBUGLOG_FLAG_ENABLED("vfu")) { + return LOG_DEBUG; + } + + level = spdk_log_to_syslog_level(spdk_log_get_level()); + if (level < 0) { + return LOG_ERR; + } + + return level; +} + +static void +init_pci_config_space(vfu_pci_config_space_t *p, uint16_t ipin) +{ + /* MLBAR */ + p->hdr.bars[0].raw = 0x0; + /* MUBAR */ + p->hdr.bars[1].raw = 0x0; + + /* vendor specific, let's set them to zero for now */ + p->hdr.bars[3].raw = 0x0; + p->hdr.bars[4].raw = 0x0; + p->hdr.bars[5].raw = 0x0; + + /* enable INTx */ + p->hdr.intr.ipin = ipin; +} + +static void +tgt_memory_region_add_cb(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info) +{ + struct spdk_vfu_endpoint *endpoint = vfu_get_private(vfu_ctx); + void *map_start, *map_end; + int ret; + + if (!info->vaddr) { + return; + } + + map_start = info->mapping.iov_base; + map_end = info->mapping.iov_base + info->mapping.iov_len; + + if (((uintptr_t)info->mapping.iov_base & MASK_2MB) || + (info->mapping.iov_len & MASK_2MB)) { + SPDK_DEBUGLOG(vfu, "Invalid memory region vaddr %p, IOVA %p-%p\n", + info->vaddr, map_start, map_end); + return; + } + + if (info->prot == (PROT_WRITE | PROT_READ)) { + ret = spdk_mem_register(info->mapping.iov_base, info->mapping.iov_len); + if (ret) { + SPDK_ERRLOG("Memory region register %p-%p failed, ret=%d\n", + map_start, map_end, ret); + } + } + + if (endpoint->ops.post_memory_add) { + endpoint->ops.post_memory_add(endpoint, map_start, map_end); + } +} + +static void +tgt_memory_region_remove_cb(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info) +{ + struct spdk_vfu_endpoint *endpoint = vfu_get_private(vfu_ctx); + void *map_start, *map_end; + int ret = 0; + + if (!info->vaddr) { + return; + } + + map_start = info->mapping.iov_base; + map_end = info->mapping.iov_base + info->mapping.iov_len; + + if (((uintptr_t)info->mapping.iov_base & MASK_2MB) || + (info->mapping.iov_len & MASK_2MB)) { + SPDK_DEBUGLOG(vfu, "Invalid memory region vaddr %p, IOVA %p-%p\n", + info->vaddr, map_start, map_end); + return; + } + + if (endpoint->ops.pre_memory_remove) { + endpoint->ops.pre_memory_remove(endpoint, map_start, map_end); + } + + if (info->prot == (PROT_WRITE | PROT_READ)) { + ret = spdk_mem_unregister(info->mapping.iov_base, info->mapping.iov_len); + if (ret) { + SPDK_ERRLOG("Memory region unregister %p-%p failed, ret=%d\n", + map_start, map_end, ret); + } + } +} + +static int +tgt_device_quiesce_cb(vfu_ctx_t *vfu_ctx) +{ + struct spdk_vfu_endpoint *endpoint = vfu_get_private(vfu_ctx); + int ret; + + assert(endpoint->ops.quiesce_device); + ret = endpoint->ops.quiesce_device(endpoint); + if (ret) { + errno = EBUSY; + ret = -1; + } + + return ret; +} + +static int +tgt_device_reset_cb(vfu_ctx_t *vfu_ctx, vfu_reset_type_t type) +{ + struct spdk_vfu_endpoint *endpoint = vfu_get_private(vfu_ctx); + + SPDK_DEBUGLOG(vfu, "Device reset type %u\n", type); + + assert(endpoint->ops.reset_device); + return endpoint->ops.reset_device(endpoint); +} + +static int +tgt_endpoint_realize(struct spdk_vfu_endpoint *endpoint) +{ + int ret; + uint8_t buf[512]; + struct vsc *vendor_cap; + ssize_t cap_offset; + uint16_t vendor_cap_idx, cap_size, sparse_mmap_idx; + struct spdk_vfu_pci_device pci_dev; + uint8_t region_idx; + + assert(endpoint->ops.get_device_info); + ret = endpoint->ops.get_device_info(endpoint, &pci_dev); + if (ret) { + SPDK_ERRLOG("%s: failed to get pci device info\n", spdk_vfu_get_endpoint_id(endpoint)); + return ret; + } + + endpoint->vfu_ctx = vfu_create_ctx(VFU_TRANS_SOCK, endpoint->uuid, LIBVFIO_USER_FLAG_ATTACH_NB, + endpoint, VFU_DEV_TYPE_PCI); + if (endpoint->vfu_ctx == NULL) { + SPDK_ERRLOG("%s: error creating libvfio-user context\n", spdk_vfu_get_endpoint_id(endpoint)); + return -EFAULT; + } + vfu_setup_log(endpoint->vfu_ctx, tgt_log_cb, tgt_get_log_level()); + + ret = vfu_pci_init(endpoint->vfu_ctx, VFU_PCI_TYPE_EXPRESS, PCI_HEADER_TYPE_NORMAL, 0); + if (ret < 0) { + SPDK_ERRLOG("vfu_ctx %p failed to initialize PCI\n", endpoint->vfu_ctx); + goto error; + } + + vfu_pci_set_id(endpoint->vfu_ctx, pci_dev.id.vid, pci_dev.id.did, pci_dev.id.ssvid, + pci_dev.id.ssid); + vfu_pci_set_class(endpoint->vfu_ctx, pci_dev.class.bcc, pci_dev.class.scc, pci_dev.class.pi); + + /* Add Vendor Capabilities */ + for (vendor_cap_idx = 0; vendor_cap_idx < pci_dev.nr_vendor_caps; vendor_cap_idx++) { + memset(buf, 0, sizeof(buf)); + cap_size = endpoint->ops.get_vendor_capability(endpoint, buf, 256, vendor_cap_idx); + if (cap_size) { + vendor_cap = (struct vsc *)buf; + assert(vendor_cap->hdr.id == PCI_CAP_ID_VNDR); + assert(vendor_cap->size == cap_size); + + cap_offset = vfu_pci_add_capability(endpoint->vfu_ctx, 0, 0, vendor_cap); + if (cap_offset < 0) { + SPDK_ERRLOG("vfu_ctx %p failed add vendor capability\n", endpoint->vfu_ctx); + ret = -EFAULT; + goto error; + } + } + } + + /* Add Standard PCI Capabilities */ + cap_offset = vfu_pci_add_capability(endpoint->vfu_ctx, 0, 0, &pci_dev.pmcap); + if (cap_offset < 0) { + SPDK_ERRLOG("vfu_ctx %p failed add pmcap\n", endpoint->vfu_ctx); + ret = -EFAULT; + goto error; + } + SPDK_DEBUGLOG(vfu, "%s PM cap_offset %ld\n", spdk_vfu_get_endpoint_id(endpoint), cap_offset); + + cap_offset = vfu_pci_add_capability(endpoint->vfu_ctx, 0, 0, &pci_dev.pxcap); + if (cap_offset < 0) { + SPDK_ERRLOG("vfu_ctx %p failed add pxcap\n", endpoint->vfu_ctx); + ret = -EFAULT; + goto error; + } + SPDK_DEBUGLOG(vfu, "%s PX cap_offset %ld\n", spdk_vfu_get_endpoint_id(endpoint), cap_offset); + + cap_offset = vfu_pci_add_capability(endpoint->vfu_ctx, 0, 0, &pci_dev.msixcap); + if (cap_offset < 0) { + SPDK_ERRLOG("vfu_ctx %p failed add msixcap\n", endpoint->vfu_ctx); + ret = -EFAULT; + goto error; + } + SPDK_DEBUGLOG(vfu, "%s MSIX cap_offset %ld\n", spdk_vfu_get_endpoint_id(endpoint), cap_offset); + + /* Setup PCI Regions */ + for (region_idx = 0; region_idx < VFU_PCI_DEV_NUM_REGIONS; region_idx++) { + struct spdk_vfu_pci_region *region = &pci_dev.regions[region_idx]; + struct iovec sparse_mmap[SPDK_VFU_MAXIMUM_SPARSE_MMAP_REGIONS]; + if (!region->len) { + continue; + } + + if (region->nr_sparse_mmaps) { + assert(region->nr_sparse_mmaps <= SPDK_VFU_MAXIMUM_SPARSE_MMAP_REGIONS); + for (sparse_mmap_idx = 0; sparse_mmap_idx < region->nr_sparse_mmaps; sparse_mmap_idx++) { + sparse_mmap[sparse_mmap_idx].iov_base = (void *)region->mmaps[sparse_mmap_idx].offset; + sparse_mmap[sparse_mmap_idx].iov_len = region->mmaps[sparse_mmap_idx].len; + } + } + + ret = vfu_setup_region(endpoint->vfu_ctx, region_idx, region->len, region->access_cb, region->flags, + region->nr_sparse_mmaps ? sparse_mmap : NULL, region->nr_sparse_mmaps, + region->fd, region->offset); + if (ret) { + SPDK_ERRLOG("vfu_ctx %p failed to setup region %u\n", endpoint->vfu_ctx, region_idx); + goto error; + } + SPDK_DEBUGLOG(vfu, "%s: region %u, len 0x%"PRIx64", callback %p, nr sparse mmaps %u, fd %d\n", + spdk_vfu_get_endpoint_id(endpoint), region_idx, region->len, region->access_cb, + region->nr_sparse_mmaps, region->fd); + } + + ret = vfu_setup_device_dma(endpoint->vfu_ctx, tgt_memory_region_add_cb, + tgt_memory_region_remove_cb); + if (ret < 0) { + SPDK_ERRLOG("vfu_ctx %p failed to setup dma callback\n", endpoint->vfu_ctx); + goto error; + } + + if (endpoint->ops.reset_device) { + ret = vfu_setup_device_reset_cb(endpoint->vfu_ctx, tgt_device_reset_cb); + if (ret < 0) { + SPDK_ERRLOG("vfu_ctx %p failed to setup reset callback\n", endpoint->vfu_ctx); + goto error; + } + } + + if (endpoint->ops.quiesce_device) { + vfu_setup_device_quiesce_cb(endpoint->vfu_ctx, tgt_device_quiesce_cb); + } + + ret = vfu_setup_device_nr_irqs(endpoint->vfu_ctx, VFU_DEV_INTX_IRQ, pci_dev.nr_int_irqs); + if (ret < 0) { + SPDK_ERRLOG("vfu_ctx %p failed to setup INTX\n", endpoint->vfu_ctx); + goto error; + } + + ret = vfu_setup_device_nr_irqs(endpoint->vfu_ctx, VFU_DEV_MSIX_IRQ, pci_dev.nr_msix_irqs); + if (ret < 0) { + SPDK_ERRLOG("vfu_ctx %p failed to setup MSIX\n", endpoint->vfu_ctx); + goto error; + } + + ret = vfu_realize_ctx(endpoint->vfu_ctx); + if (ret < 0) { + SPDK_ERRLOG("vfu_ctx %p failed to realize\n", endpoint->vfu_ctx); + goto error; + } + + endpoint->pci_config_space = vfu_pci_get_config_space(endpoint->vfu_ctx); + assert(endpoint->pci_config_space != NULL); + init_pci_config_space(endpoint->pci_config_space, pci_dev.intr_ipin); + + assert(cap_offset != 0); + endpoint->msix = (struct msixcap *)((uint8_t *)endpoint->pci_config_space + cap_offset); + + return 0; + +error: + if (endpoint->vfu_ctx) { + vfu_destroy_ctx(endpoint->vfu_ctx); + } + return ret; +} + +static int +vfu_parse_core_mask(const char *mask, struct spdk_cpuset *cpumask) +{ + int rc; + struct spdk_cpuset negative_vfu_mask; + + if (cpumask == NULL) { + return -1; + } + + if (mask == NULL) { + spdk_cpuset_copy(cpumask, &g_tgt_core_mask); + return 0; + } + + rc = spdk_cpuset_parse(cpumask, mask); + if (rc < 0) { + SPDK_ERRLOG("invalid cpumask %s\n", mask); + return -1; + } + + spdk_cpuset_copy(&negative_vfu_mask, &g_tgt_core_mask); + spdk_cpuset_negate(&negative_vfu_mask); + spdk_cpuset_and(&negative_vfu_mask, cpumask); + + if (spdk_cpuset_count(&negative_vfu_mask) != 0) { + SPDK_ERRLOG("one of selected cpu is outside of core mask(=%s)\n", + spdk_cpuset_fmt(&g_tgt_core_mask)); + return -1; + } + + spdk_cpuset_and(cpumask, &g_tgt_core_mask); + + if (spdk_cpuset_count(cpumask) == 0) { + SPDK_ERRLOG("no cpu is selected among core mask(=%s)\n", + spdk_cpuset_fmt(&g_tgt_core_mask)); + return -1; + } + + return 0; +} + +static void +tgt_endpoint_start_thread(void *arg1) +{ + struct spdk_vfu_endpoint *endpoint = arg1; + + endpoint->accept_poller = SPDK_POLLER_REGISTER(tgt_accept_poller, endpoint, 1000); + assert(endpoint->accept_poller != NULL); +} + +static void +tgt_endpoint_thread_exit(void *arg1) +{ + struct spdk_vfu_endpoint *endpoint = arg1; + + spdk_poller_unregister(&endpoint->accept_poller); + spdk_poller_unregister(&endpoint->vfu_ctx_poller); + + /* Ensure the attached device is stopped before destorying the vfu context */ + if (endpoint->ops.detach_device) { + endpoint->ops.detach_device(endpoint); + } + + if (endpoint->vfu_ctx) { + vfu_destroy_ctx(endpoint->vfu_ctx); + } + + endpoint->ops.destruct(endpoint); + free(endpoint); + + spdk_thread_exit(spdk_get_thread()); +} + +int +spdk_vfu_create_endpoint(const char *endpoint_name, const char *cpumask_str, + const char *dev_type_name) +{ + char *basename; + char uuid[PATH_MAX] = ""; + struct spdk_cpuset cpumask = {}; + struct spdk_vfu_endpoint *endpoint; + struct spdk_vfu_endpoint_ops *ops; + int ret = 0; + + ret = vfu_parse_core_mask(cpumask_str, &cpumask); + if (ret) { + return ret; + } + + if (strlen(endpoint_name) >= SPDK_VFU_MAX_NAME_LEN - 1) { + return -ENAMETOOLONG; + } + + if (spdk_vfu_get_endpoint_by_name(endpoint_name)) { + SPDK_ERRLOG("%s already exist\n", endpoint_name); + return -EEXIST; + } + + /* Find supported PCI device type */ + ops = tgt_get_pci_device_ops(dev_type_name); + if (!ops) { + SPDK_ERRLOG("Request %s device type isn't registered\n", dev_type_name); + return -ENOTSUP; + } + + basename = tgt_get_base_path(); + if (snprintf(uuid, sizeof(uuid), "%s%s", basename, endpoint_name) >= (int)sizeof(uuid)) { + SPDK_ERRLOG("Resulting socket path for endpoint %s is too long: %s%s\n", + endpoint_name, basename, endpoint_name); + return -EINVAL; + } + + endpoint = calloc(1, sizeof(*endpoint)); + if (!endpoint) { + return -ENOMEM; + } + + endpoint->endpoint_ctx = ops->init(endpoint, basename, endpoint_name); + if (!endpoint->endpoint_ctx) { + free(endpoint); + return -EINVAL; + } + endpoint->ops = *ops; + snprintf(endpoint->name, SPDK_VFU_MAX_NAME_LEN, "%s", endpoint_name); + snprintf(endpoint->uuid, sizeof(uuid), "%s", uuid); + + SPDK_DEBUGLOG(vfu, "Construct endpoint %s\n", endpoint_name); + /* Endpoint realize */ + ret = tgt_endpoint_realize(endpoint); + if (ret) { + endpoint->ops.destruct(endpoint); + free(endpoint); + return ret; + } + + endpoint->thread = spdk_thread_create(endpoint_name, &cpumask); + if (!endpoint->thread) { + endpoint->ops.destruct(endpoint); + vfu_destroy_ctx(endpoint->vfu_ctx); + free(endpoint); + return -EFAULT; + } + + pthread_mutex_lock(&g_endpoint_lock); + TAILQ_INSERT_TAIL(&g_endpoint, endpoint, link); + pthread_mutex_unlock(&g_endpoint_lock); + + spdk_thread_send_msg(endpoint->thread, tgt_endpoint_start_thread, endpoint); + + return 0; +} + +int +spdk_vfu_delete_endpoint(const char *endpoint_name) +{ + struct spdk_vfu_endpoint *endpoint; + + endpoint = spdk_vfu_get_endpoint_by_name(endpoint_name); + if (!endpoint) { + SPDK_ERRLOG("%s doesn't exist\n", endpoint_name); + return -ENOENT; + } + + SPDK_NOTICELOG("Destruct endpoint %s\n", endpoint_name); + + pthread_mutex_lock(&g_endpoint_lock); + TAILQ_REMOVE(&g_endpoint, endpoint, link); + pthread_mutex_unlock(&g_endpoint_lock); + spdk_thread_send_msg(endpoint->thread, tgt_endpoint_thread_exit, endpoint); + + return 0; +} + +const char * +spdk_vfu_get_endpoint_id(struct spdk_vfu_endpoint *endpoint) +{ + return endpoint->uuid; +} + +const char * +spdk_vfu_get_endpoint_name(struct spdk_vfu_endpoint *endpoint) +{ + return endpoint->name; +} + +vfu_ctx_t * +spdk_vfu_get_vfu_ctx(struct spdk_vfu_endpoint *endpoint) +{ + return endpoint->vfu_ctx; +} + +void * +spdk_vfu_get_endpoint_private(struct spdk_vfu_endpoint *endpoint) +{ + return endpoint->endpoint_ctx; +} + +bool +spdk_vfu_endpoint_msix_enabled(struct spdk_vfu_endpoint *endpoint) +{ + return endpoint->msix->mxc.mxe; +} + +bool +spdk_vfu_endpoint_intx_enabled(struct spdk_vfu_endpoint *endpoint) +{ + return !endpoint->pci_config_space->hdr.cmd.id; +} + +void * +spdk_vfu_endpoint_get_pci_config(struct spdk_vfu_endpoint *endpoint) +{ + return (void *)endpoint->pci_config_space; +} + +void +spdk_vfu_init(spdk_vfu_init_cb init_cb) +{ + uint32_t i; + size_t len; + + if (g_endpoint_path_dirname[0] == '\0') { + if (getcwd(g_endpoint_path_dirname, sizeof(g_endpoint_path_dirname) - 2) == NULL) { + SPDK_ERRLOG("getcwd failed\n"); + return; + } + + len = strlen(g_endpoint_path_dirname); + if (g_endpoint_path_dirname[len - 1] != '/') { + g_endpoint_path_dirname[len] = '/'; + g_endpoint_path_dirname[len + 1] = '\0'; + } + } + + spdk_cpuset_zero(&g_tgt_core_mask); + SPDK_ENV_FOREACH_CORE(i) { + spdk_cpuset_set_cpu(&g_tgt_core_mask, i, true); + } + + init_cb(0); +} + +void * +spdk_vfu_map_one(struct spdk_vfu_endpoint *endpoint, uint64_t addr, uint64_t len, dma_sg_t *sg, + struct iovec *iov, + int prot) +{ + int ret; + + assert(endpoint != NULL); + assert(endpoint->vfu_ctx != NULL); + assert(sg != NULL); + assert(iov != NULL); + + ret = vfu_addr_to_sgl(endpoint->vfu_ctx, (void *)(uintptr_t)addr, len, sg, 1, prot); + if (ret < 0) { + return NULL; + } + + ret = vfu_sgl_get(endpoint->vfu_ctx, sg, iov, 1, 0); + if (ret != 0) { + return NULL; + } + + assert(iov->iov_base != NULL); + return iov->iov_base; +} + +void +spdk_vfu_unmap_sg(struct spdk_vfu_endpoint *endpoint, dma_sg_t *sg, struct iovec *iov, int iovcnt) +{ + assert(endpoint != NULL); + assert(endpoint->vfu_ctx != NULL); + assert(sg != NULL); + assert(iov != NULL); + + vfu_sgl_put(endpoint->vfu_ctx, sg, iov, iovcnt); +} + +void +spdk_vfu_fini(spdk_vfu_fini_cb fini_cb) +{ + struct spdk_vfu_endpoint *endpoint, *tmp; + struct tgt_pci_device_ops *ops, *ops_tmp; + + pthread_mutex_lock(&g_endpoint_lock); + TAILQ_FOREACH_SAFE(ops, &g_pci_device_ops, link, ops_tmp) { + TAILQ_REMOVE(&g_pci_device_ops, ops, link); + free(ops); + } + + TAILQ_FOREACH_SAFE(endpoint, &g_endpoint, link, tmp) { + TAILQ_REMOVE(&g_endpoint, endpoint, link); + spdk_thread_send_msg(endpoint->thread, tgt_endpoint_thread_exit, endpoint); + } + pthread_mutex_unlock(&g_endpoint_lock); + + fini_cb(); +} +SPDK_LOG_REGISTER_COMPONENT(vfu) diff --git a/lib/vfu_tgt/tgt_internal.h b/lib/vfu_tgt/tgt_internal.h new file mode 100644 index 000000000..f5a8b13c6 --- /dev/null +++ b/lib/vfu_tgt/tgt_internal.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) Intel Corporation. + * All rights reserved. + */ + +#ifndef _TGT_INTERNAL_H +#define _TGT_INTERNAL_H + +#include "spdk/vfu_target.h" + +struct spdk_vfu_endpoint { + char name[SPDK_VFU_MAX_NAME_LEN]; + char uuid[PATH_MAX]; + + struct spdk_vfu_endpoint_ops ops; + + vfu_ctx_t *vfu_ctx; + void *endpoint_ctx; + + struct spdk_poller *accept_poller; + struct spdk_poller *vfu_ctx_poller; + bool is_attached; + + struct msixcap *msix; + vfu_pci_config_space_t *pci_config_space; + + struct spdk_thread *thread; + + TAILQ_ENTRY(spdk_vfu_endpoint) link; +}; + +#endif diff --git a/lib/vfu_tgt/tgt_rpc.c b/lib/vfu_tgt/tgt_rpc.c new file mode 100644 index 000000000..ce4a8f714 --- /dev/null +++ b/lib/vfu_tgt/tgt_rpc.c @@ -0,0 +1,60 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) Intel Corporation. + * All rights reserved. + */ + +#include "spdk/bdev.h" +#include "spdk/log.h" +#include "spdk/rpc.h" +#include "spdk/env.h" +#include "spdk/string.h" +#include "spdk/util.h" +#include "spdk/thread.h" + +#include "tgt_internal.h" + +struct rpc_set_vfu_path { + char *path; +}; + +static const struct spdk_json_object_decoder rpc_set_vfu_path_decode[] = { + {"path", offsetof(struct rpc_set_vfu_path, path), spdk_json_decode_string } +}; + +static void +free_rpc_set_vfu_path(struct rpc_set_vfu_path *req) +{ + free(req->path); +} + +static void +rpc_vfu_set_base_path(struct spdk_jsonrpc_request *request, + const struct spdk_json_val *params) +{ + struct rpc_set_vfu_path req = {0}; + int rc; + + if (spdk_json_decode_object(params, rpc_set_vfu_path_decode, + SPDK_COUNTOF(rpc_set_vfu_path_decode), + &req)) { + SPDK_ERRLOG("spdk_json_decode_object failed\n"); + rc = -EINVAL; + goto invalid; + } + + rc = spdk_vfu_set_socket_path(req.path); + if (rc < 0) { + goto invalid; + } + free_rpc_set_vfu_path(&req); + + spdk_jsonrpc_send_bool_response(request, true); + return; + +invalid: + free_rpc_set_vfu_path(&req); + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, + spdk_strerror(-rc)); +} +SPDK_RPC_REGISTER("vfu_tgt_set_base_path", rpc_vfu_set_base_path, + SPDK_RPC_RUNTIME) diff --git a/mk/spdk.lib_deps.mk b/mk/spdk.lib_deps.mk index 4020f406c..4b7230511 100644 --- a/mk/spdk.lib_deps.mk +++ b/mk/spdk.lib_deps.mk @@ -27,6 +27,7 @@ DEPDIRS-dma := log DEPDIRS-trace_parser := log ifeq ($(CONFIG_VFIO_USER),y) DEPDIRS-vfio_user := log +DEPDIRS-vfu_tgt := log util thread $(JSON_LIBS) endif DEPDIRS-conf := log util @@ -158,3 +159,4 @@ DEPDIRS-event_iscsi := init iscsi event_scheduler event_scsi event_sock DEPDIRS-event_vhost_blk := init vhost DEPDIRS-event_vhost_scsi := init vhost event_scheduler event_scsi DEPDIRS-event_sock := init sock +DEPDIRS-event_vfu_tgt := init vfu_tgt diff --git a/module/event/subsystems/Makefile b/module/event/subsystems/Makefile index 306c9233f..6f456994e 100644 --- a/module/event/subsystems/Makefile +++ b/module/event/subsystems/Makefile @@ -13,6 +13,7 @@ DIRS-y += nbd endif DIRS-$(CONFIG_VHOST) += vhost_blk vhost_scsi +DIRS-$(CONFIG_VFIO_USER) += vfu_tgt # These dependencies are not based specifically on symbols, but rather # the subsystem dependency tree defined within the event subsystem C files diff --git a/module/event/subsystems/vfu_tgt/Makefile b/module/event/subsystems/vfu_tgt/Makefile new file mode 100644 index 000000000..7dfa4885a --- /dev/null +++ b/module/event/subsystems/vfu_tgt/Makefile @@ -0,0 +1,17 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) Intel Corporation. +# All rights reserved. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk + +SO_VER := 1 +SO_MINOR := 0 + +C_SRCS = vfu_tgt.c +LIBNAME = event_vfu_tgt + +SPDK_MAP_FILE = $(SPDK_ROOT_DIR)/mk/spdk_blank.map + +include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk diff --git a/module/event/subsystems/vfu_tgt/vfu_tgt.c b/module/event/subsystems/vfu_tgt/vfu_tgt.c new file mode 100644 index 000000000..789d7b71f --- /dev/null +++ b/module/event/subsystems/vfu_tgt/vfu_tgt.c @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) Intel Corporation. + * All rights reserved. + */ + +#include "spdk/stdinc.h" +#include "spdk/vfu_target.h" + +#include "spdk_internal/init.h" + +static void +vfu_subsystem_init_done(int rc) +{ + spdk_subsystem_init_next(rc); +} + +static void +vfu_target_subsystem_init(void) +{ + spdk_vfu_init(vfu_subsystem_init_done); +} + +static void +vfu_target_subsystem_fini_done(void) +{ + spdk_subsystem_fini_next(); +} + +static void +vfu_target_subsystem_fini(void) +{ + spdk_vfu_fini(vfu_target_subsystem_fini_done); +} + +static struct spdk_subsystem g_spdk_subsystem_vfu_target = { + .name = "vfio_user_target", + .init = vfu_target_subsystem_init, + .fini = vfu_target_subsystem_fini, +}; + +SPDK_SUBSYSTEM_REGISTER(g_spdk_subsystem_vfu_target); diff --git a/python/spdk/rpc/__init__.py b/python/spdk/rpc/__init__.py index 3a3d9f6f7..668598ed5 100644 --- a/python/spdk/rpc/__init__.py +++ b/python/spdk/rpc/__init__.py @@ -25,6 +25,7 @@ from . import trace from . import vhost from . import vmd from . import sock +from . import vfio_user from . import client as rpc_client diff --git a/python/spdk/rpc/vfio_user.py b/python/spdk/rpc/vfio_user.py new file mode 100644 index 000000000..638fd5237 --- /dev/null +++ b/python/spdk/rpc/vfio_user.py @@ -0,0 +1,11 @@ +def vfu_tgt_set_base_path(client, path): + """Set socket base path. + + Args: + path: base path + """ + params = { + 'path': path + } + + return client.call('vfu_tgt_set_base_path', params) diff --git a/scripts/rpc.py b/scripts/rpc.py index 49467fdd0..38ad6891a 100755 --- a/scripts/rpc.py +++ b/scripts/rpc.py @@ -2616,6 +2616,14 @@ Format: 'user:u1 secret:s1 muser:mu1 msecret:ms1,user:u2 secret:s2 muser:mu2 mse help='How often the hotplug is processed for insert and remove events', type=int) p.set_defaults(func=bdev_virtio_blk_set_hotplug) + # vfio-user target + def vfu_tgt_set_base_path(args): + rpc.vfio_user.vfu_tgt_set_base_path(args.client, path=args.path) + + p = subparsers.add_parser('vfu_tgt_set_base_path', help='Set socket base path.') + p.add_argument('path', help='socket base path') + p.set_defaults(func=vfu_tgt_set_base_path) + # accel_fw def accel_get_opc_assignments(args): print_dict(rpc.accel.accel_get_opc_assignments(args.client)) diff --git a/test/cpp_headers/Makefile b/test/cpp_headers/Makefile index 0d938d9ab..f4f628e6c 100644 --- a/test/cpp_headers/Makefile +++ b/test/cpp_headers/Makefile @@ -11,6 +11,7 @@ HEADERS := $(wildcard $(SPDK_ROOT_DIR)/include/spdk/*.h) # On Linux, queue_extras.h is pulled in with queue.h, # on FreeBSD, we want to ignore queue_extras.h entirely. HEADERS := $(filter-out $(SPDK_ROOT_DIR)/include/spdk/queue_extras.h,$(HEADERS)) +HEADERS := $(filter-out $(SPDK_ROOT_DIR)/include/spdk/vfu_target.h,$(HEADERS)) CXX_SRCS := $(patsubst %.h,%.cpp,$(notdir $(HEADERS))) install : all