From 23ef63882cb62bd2aa2c82498591276685ae1330 Mon Sep 17 00:00:00 2001 From: Changpeng Liu Date: Sat, 7 May 2022 13:44:36 +0800 Subject: [PATCH] module/vfu_device: add virtio-blk emulation Here we use vfu-tgt library and emulate a virtio-blk device as the first use case of vfu-tgt library. Usage example with QEMU: 1. build/bin/spdk_tgt 2. scripts/rpc.py bdev_malloc_create -b malloc0 $((512)) 512 3. scripts/rpc.py vfu_virtio_create_blk_endpoint vfu.0 --bdev-name malloc0 \ --cpumask=0x1 --num-queues=2 \ --qsize=256 --packed-ring 4. Start QEMU with '-device vfio-user-pci,socket=/spdk/vfu.0' Change-Id: I45e45360c669584583b0b8a3f83250ab6c48efec Signed-off-by: Changpeng Liu Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/12315 Tested-by: SPDK CI Jenkins Reviewed-by: Konrad Sztyber Reviewed-by: Ben Walker Community-CI: Mellanox Build Bot --- doc/jsonrpc.md | 79 + mk/spdk.lib_deps.mk | 6 + mk/spdk.modules.mk | 5 + module/Makefile | 5 + module/vfu_device/Makefile | 17 + module/vfu_device/vfu_virtio.c | 1777 +++++++++++++++++++++++ module/vfu_device/vfu_virtio_blk.c | 615 ++++++++ module/vfu_device/vfu_virtio_internal.h | 403 +++++ module/vfu_device/vfu_virtio_rpc.c | 126 ++ python/spdk/rpc/vfio_user.py | 40 + scripts/rpc.py | 25 + 11 files changed, 3098 insertions(+) create mode 100644 module/vfu_device/Makefile create mode 100644 module/vfu_device/vfu_virtio.c create mode 100644 module/vfu_device/vfu_virtio_blk.c create mode 100644 module/vfu_device/vfu_virtio_internal.h create mode 100644 module/vfu_device/vfu_virtio_rpc.c diff --git a/doc/jsonrpc.md b/doc/jsonrpc.md index 8170e9e45..f4f6f816f 100644 --- a/doc/jsonrpc.md +++ b/doc/jsonrpc.md @@ -8010,6 +8010,85 @@ Example response: } ~~~ +### vfu_virtio_delete_endpoint {#rpc_vfu_virtio_delete_endpoint} + +Delete PCI device via endpoint name. + +#### Parameters + +Name | Optional | Type | Description +----------------------- | -------- | ----------- | ----------- +name | Required | string | Endpoint name + +#### Example + +Example request: + +~~~json +{ + "params": { + "name": "vfu.0" + }, + "jsonrpc": "2.0", + "method": "vfu_virtio_delete_endpoint", + "id": 1 +} +~~~ + +Example response: + +~~~json +{ + "jsonrpc": "2.0", + "id": 1, + "result": true +} +~~~ + +### vfu_virtio_create_blk_endpoint {#rpc_vfu_virtio_create_blk_endpoint} + +Create vfio-user virtio-blk PCI endpoint. + +#### Parameters + +Name | Optional | Type | Description +----------------------- | -------- | ----------- | ----------- +name | Required | string | Endpoint name +bdev_name | Required | string | Block device name +cpumask | Optional | string | CPU masks +num_queues | Optional | number | Number of queues +qsize | Optional | number | Queue size +packed_ring | Optional | boolean | Enable packed ring + +#### Example + +Example request: + +~~~json +{ + "params": { + "name": "vfu.0", + "bdev_name": "Malloc0", + "cpumask": "0x2", + "num_queues": 4, + "qsize": 256 + }, + "jsonrpc": "2.0", + "method": "vfu_virtio_create_blk_endpoint", + "id": 1 +} +~~~ + +Example response: + +~~~json +{ + "jsonrpc": "2.0", + "id": 1, + "result": true +} +~~~ + ## Vhost Target {#jsonrpc_components_vhost_tgt} The following common preconditions need to be met in all target types. diff --git a/mk/spdk.lib_deps.mk b/mk/spdk.lib_deps.mk index 4b7230511..237f0ac1c 100644 --- a/mk/spdk.lib_deps.mk +++ b/mk/spdk.lib_deps.mk @@ -160,3 +160,9 @@ DEPDIRS-event_vhost_blk := init vhost DEPDIRS-event_vhost_scsi := init vhost event_scheduler event_scsi DEPDIRS-event_sock := init sock DEPDIRS-event_vfu_tgt := init vfu_tgt + +# module/vfu_device + +ifeq ($(CONFIG_VFIO_USER),y) +DEPDIRS-vfu_device := $(BDEV_DEPS_THREAD) vfu_tgt +endif diff --git a/mk/spdk.modules.mk b/mk/spdk.modules.mk index 92f1f0382..01f8f5cd4 100644 --- a/mk/spdk.modules.mk +++ b/mk/spdk.modules.mk @@ -107,7 +107,12 @@ ifeq (y,$(DPDK_POWER)) SCHEDULER_MODULES_LIST += env_dpdk scheduler_dpdk_governor scheduler_gscheduler endif +ifeq ($(CONFIG_VFIO_USER),y) +VFU_DEVICE_MODULES_LIST = vfu_device +endif + EVENT_BDEV_SUBSYSTEM = event_bdev event_accel event_vmd event_sock ALL_MODULES_LIST = $(BLOCKDEV_MODULES_LIST) $(ACCEL_MODULES_LIST) $(SCHEDULER_MODULES_LIST) $(SOCK_MODULES_LIST) +ALL_MODULES_LIST += $(VFU_DEVICE_MODULES_LIST) SYS_LIBS += $(BLOCKDEV_MODULES_PRIVATE_LIBS) diff --git a/module/Makefile b/module/Makefile index d83fb79db..9b2191b67 100644 --- a/module/Makefile +++ b/module/Makefile @@ -12,6 +12,10 @@ ifeq ($(SPDK_ROOT_DIR)/lib/env_dpdk,$(CONFIG_ENV)) DIRS-y += env_dpdk endif +ifeq ($(CONFIG_VFIO_USER), y) +DIRS-y += vfu_device +endif + DEPDIRS-blob := DEPDIRS-accel := DEPDIRS-env_dpdk := @@ -20,6 +24,7 @@ DEPDIRS-scheduler := DEPDIRS-bdev := blob DEPDIRS-blobfs := blob DEPDIRS-event := bdev blob +DEPDIRS-vfu_device := .PHONY: all clean $(DIRS-y) diff --git a/module/vfu_device/Makefile b/module/vfu_device/Makefile new file mode 100644 index 000000000..48fab3c99 --- /dev/null +++ b/module/vfu_device/Makefile @@ -0,0 +1,17 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) Intel Corporation. +# All rights reserved. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk + +SO_VER := 1 +SO_MINOR := 0 + +C_SRCS = vfu_virtio.c vfu_virtio_blk.c vfu_virtio_rpc.c +LIBNAME = vfu_device + +SPDK_MAP_FILE = $(SPDK_ROOT_DIR)/mk/spdk_blank.map + +include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk diff --git a/module/vfu_device/vfu_virtio.c b/module/vfu_device/vfu_virtio.c new file mode 100644 index 000000000..904332fbe --- /dev/null +++ b/module/vfu_device/vfu_virtio.c @@ -0,0 +1,1777 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) Intel Corporation. + * All rights reserved. + */ + +/* + * virtio over vfio-user common library + */ +#include "spdk/env.h" +#include "spdk/bdev.h" +#include "spdk/bdev_module.h" +#include "spdk/stdinc.h" +#include "spdk/assert.h" +#include "spdk/barrier.h" +#include "spdk/thread.h" +#include "spdk/memory.h" +#include "spdk/util.h" +#include "spdk/log.h" +#include "spdk/string.h" +#include "spdk/likely.h" + +#include "vfu_virtio_internal.h" + +static int vfu_virtio_dev_start(struct vfu_virtio_dev *dev); +static int vfu_virtio_dev_stop(struct vfu_virtio_dev *dev); + +static inline void +vfu_virtio_unmap_q(struct vfu_virtio_dev *dev, struct q_mapping *mapping) +{ + struct vfu_virtio_endpoint *virtio_endpoint = dev->virtio_endpoint; + + if (mapping->addr != NULL) { + spdk_vfu_unmap_sg(virtio_endpoint->endpoint, mapping->sg, + &mapping->iov, 1); + mapping->addr = NULL; + mapping->len = 0; + } +} + +static inline int +vfu_virtio_map_q(struct vfu_virtio_dev *dev, struct q_mapping *mapping, uint64_t phys_addr, + uint64_t len) +{ + struct vfu_virtio_endpoint *virtio_endpoint = dev->virtio_endpoint; + void *addr; + + if (!mapping->addr && len && phys_addr) { + addr = spdk_vfu_map_one(virtio_endpoint->endpoint, phys_addr, len, + mapping->sg, &mapping->iov, PROT_READ | PROT_WRITE); + if (addr == NULL) { + return -EINVAL; + } + mapping->phys_addr = phys_addr; + mapping->len = len; + mapping->addr = addr; + } + + return 0; +} + +static int +virtio_dev_map_vq(struct vfu_virtio_dev *dev, struct vfu_virtio_vq *vq) +{ + int ret; + uint64_t phys_addr, len; + + if (!vq->enabled || (vq->q_state == VFU_VQ_ACTIVE)) { + return 0; + } + + SPDK_DEBUGLOG(vfu_virtio, "%s: try to map vq %u\n", dev->name, vq->id); + + len = virtio_queue_desc_size(dev, vq); + phys_addr = ((((uint64_t)vq->desc_hi) << 32) | vq->desc_lo); + ret = vfu_virtio_map_q(dev, &vq->desc, phys_addr, len); + if (ret) { + SPDK_DEBUGLOG(vfu_virtio, "Error to map descs\n"); + return ret; + } + + len = virtio_queue_avail_size(dev, vq); + phys_addr = ((((uint64_t)vq->avail_hi) << 32) | vq->avail_lo); + ret = vfu_virtio_map_q(dev, &vq->avail, phys_addr, len); + if (ret) { + vfu_virtio_unmap_q(dev, &vq->desc); + SPDK_DEBUGLOG(vfu_virtio, "Error to map available ring\n"); + return ret; + } + + len = virtio_queue_used_size(dev, vq); + phys_addr = ((((uint64_t)vq->used_hi) << 32) | vq->used_lo); + ret = vfu_virtio_map_q(dev, &vq->used, phys_addr, len); + if (ret) { + vfu_virtio_unmap_q(dev, &vq->desc); + vfu_virtio_unmap_q(dev, &vq->avail); + SPDK_DEBUGLOG(vfu_virtio, "Error to map used ring\n"); + return ret; + } + + /* We're running with polling mode */ + if (virtio_guest_has_feature(dev, VIRTIO_F_RING_PACKED)) { + vq->used.device_event->flags = VRING_PACKED_EVENT_FLAG_DISABLE; + } else { + vq->used.used->flags = VRING_USED_F_NO_NOTIFY; + } + + SPDK_DEBUGLOG(vfu_virtio, "%s: map vq %u successfully\n", dev->name, vq->id); + vq->q_state = VFU_VQ_ACTIVE; + + return 0; +} + +static void +virtio_dev_unmap_vq(struct vfu_virtio_dev *dev, struct vfu_virtio_vq *vq) +{ + SPDK_DEBUGLOG(vfu_virtio, "%s: unmap vq %u\n", dev->name, vq->id); + vq->q_state = VFU_VQ_INACTIVE; + + vfu_virtio_unmap_q(dev, &vq->desc); + vfu_virtio_unmap_q(dev, &vq->avail); + vfu_virtio_unmap_q(dev, &vq->used); +} + +static bool +vfu_virtio_vq_should_unmap(struct vfu_virtio_vq *vq, void *map_start, void *map_end) +{ + /* always do unmap when stopping the device */ + if (!map_start || !map_end) { + return true; + } + + if (vq->desc.addr >= map_start && vq->desc.addr < map_end) { + return true; + } + + if (vq->avail.addr >= map_start && vq->avail.addr < map_end) { + return true; + } + + if (vq->used.addr >= map_start && vq->used.addr < map_end) { + return true; + } + + return false; +} + +static void +vfu_virtio_dev_unmap_vqs(struct vfu_virtio_dev *dev, void *map_start, void *map_end) +{ + uint32_t i; + struct vfu_virtio_vq *vq; + + for (i = 0; i < dev->num_queues; i++) { + vq = &dev->vqs[i]; + if (!vq->enabled) { + continue; + } + + if (!vfu_virtio_vq_should_unmap(vq, map_start, map_end)) { + continue; + } + virtio_dev_unmap_vq(dev, vq); + } +} + +/* This function is used to notify VM that the device + * configuration space has been changed. + */ +void +vfu_virtio_notify_config(struct vfu_virtio_endpoint *virtio_endpoint) +{ + struct spdk_vfu_endpoint *endpoint = virtio_endpoint->endpoint; + + if (virtio_endpoint->dev == NULL) { + return; + } + + virtio_endpoint->dev->cfg.isr = 1; + virtio_endpoint->dev->cfg.config_generation++; + + vfu_irq_trigger(spdk_vfu_get_vfu_ctx(endpoint), virtio_endpoint->dev->cfg.msix_config); +} + +static void +vfu_virtio_dev_reset(struct vfu_virtio_dev *dev) +{ + uint32_t i; + struct vfu_virtio_vq *vq; + + SPDK_DEBUGLOG(vfu_virtio, "device %s resetting\n", dev->name); + + for (i = 0; i < dev->num_queues; i++) { + vq = &dev->vqs[i]; + + vq->q_state = VFU_VQ_CREATED; + vq->vector = 0; + vq->enabled = false; + vq->last_avail_idx = 0; + vq->last_used_idx = 0; + + vq->packed.packed_ring = false; + vq->packed.avail_phase = 0; + vq->packed.used_phase = 0; + } + + memset(&dev->cfg, 0, sizeof(struct virtio_pci_cfg)); +} + +static int +virtio_dev_set_status(struct vfu_virtio_dev *dev, uint8_t status) +{ + int ret = 0; + + SPDK_DEBUGLOG(vfu_virtio, "device current status %x, set status %x\n", dev->cfg.device_status, + status); + + if (!(virtio_dev_is_started(dev))) { + if (status & VIRTIO_CONFIG_S_DRIVER_OK) { + ret = vfu_virtio_dev_start(dev); + } + } else { + if (!(status & VIRTIO_CONFIG_S_DRIVER_OK)) { + ret = vfu_virtio_dev_stop(dev); + } + } + + if (ret) { + SPDK_ERRLOG("Failed to start/stop device\n"); + return ret; + } + + dev->cfg.device_status = status; + + if (status == 0) { + vfu_virtio_dev_reset(dev); + } + + return 0; +} + +static int +virtio_dev_set_features(struct vfu_virtio_dev *dev, uint64_t features) +{ + if (dev->cfg.device_status & VIRTIO_CONFIG_S_FEATURES_OK) { + SPDK_ERRLOG("Feature negotiation has finished\n"); + return -EINVAL; + } + + if (features & ~dev->host_features) { + SPDK_ERRLOG("Host features 0x%"PRIx64", guest features 0x%"PRIx64"\n", + dev->host_features, features); + return -ENOTSUP; + } + + SPDK_DEBUGLOG(vfu_virtio, "%s: negotiated features 0x%"PRIx64"\n", dev->name, + features); + dev->cfg.guest_features = features; + + return 0; +} + +static int +virtio_dev_enable_vq(struct vfu_virtio_dev *dev, uint16_t qid) +{ + struct vfu_virtio_vq *vq; + + SPDK_DEBUGLOG(vfu_virtio, "%s: enable vq %u\n", dev->name, qid); + + vq = &dev->vqs[qid]; + if (vq->enabled) { + SPDK_ERRLOG("Queue %u is enabled\n", qid); + return -EINVAL; + } + vq->enabled = true; + + if (virtio_dev_map_vq(dev, vq)) { + SPDK_ERRLOG("Queue %u failed to map\n", qid); + return 0; + } + + vq->avail.avail->idx = 0; + vq->last_avail_idx = 0; + vq->used.used->idx = 0; + vq->last_used_idx = 0; + + if (virtio_guest_has_feature(dev, VIRTIO_F_RING_PACKED)) { + SPDK_DEBUGLOG(vfu_virtio, "%s: vq %u PACKED RING ENABLED\n", dev->name, qid); + vq->packed.packed_ring = true; + vq->packed.avail_phase = true; + vq->packed.used_phase = true; + } + + return 0; +} + +static int +virtio_dev_disable_vq(struct vfu_virtio_dev *dev, uint16_t qid) +{ + struct vfu_virtio_vq *vq; + + SPDK_DEBUGLOG(vfu_virtio, "%s: disable vq %u\n", dev->name, qid); + + vq = &dev->vqs[qid]; + if (!vq->enabled) { + SPDK_NOTICELOG("Queue %u isn't enabled\n", qid); + return 0; + } + + virtio_dev_unmap_vq(dev, vq); + + vq->q_state = VFU_VQ_CREATED; + vq->vector = 0; + vq->enabled = false; + vq->last_avail_idx = 0; + vq->last_used_idx = 0; + vq->packed.packed_ring = false; + vq->packed.avail_phase = 0; + vq->packed.used_phase = 0; + + return 0; +} + +static int +virtio_dev_split_get_avail_reqs(struct vfu_virtio_dev *dev, struct vfu_virtio_vq *vq, + uint16_t *reqs, uint16_t max_reqs) +{ + uint16_t count, i, avail_idx, last_idx; + + last_idx = vq->last_avail_idx; + avail_idx = vq->avail.avail->idx; + + spdk_smp_rmb(); + + count = avail_idx - last_idx; + if (count == 0) { + return 0; + } + + count = spdk_min(count, max_reqs); + vq->last_avail_idx += count; + + for (i = 0; i < count; i++) { + reqs[i] = vq->avail.avail->ring[(last_idx + i) & (vq->qsize - 1)]; + } + + SPDK_DEBUGLOG(vfu_virtio, + "AVAIL: vq %u last_idx=%"PRIu16" avail_idx=%"PRIu16" count=%"PRIu16"\n", + vq->id, last_idx, avail_idx, count); + + return count; +} + +static int +virtio_vring_split_desc_get_next(struct vring_desc **desc, + struct vring_desc *desc_table, + uint32_t desc_table_size) +{ + struct vring_desc *old_desc = *desc; + uint16_t next_idx; + + if ((old_desc->flags & VRING_DESC_F_NEXT) == 0) { + *desc = NULL; + return 0; + } + + next_idx = old_desc->next; + if (spdk_unlikely(next_idx >= desc_table_size)) { + *desc = NULL; + return -1; + } + + *desc = &desc_table[next_idx]; + return 0; +} + +static inline void * +virtio_vring_desc_to_iov(struct vfu_virtio_dev *dev, struct vring_desc *desc, + dma_sg_t *sg, struct iovec *iov) +{ + struct vfu_virtio_endpoint *virtio_endpoint = dev->virtio_endpoint; + + return spdk_vfu_map_one(virtio_endpoint->endpoint, desc->addr, desc->len, + sg, iov, PROT_READ | PROT_WRITE); +} + +static int +virtio_split_vring_get_desc(struct vfu_virtio_dev *dev, struct vfu_virtio_vq *vq, uint16_t desc_idx, + struct vring_desc **desc, struct vring_desc **desc_table, + uint32_t *desc_table_size, + dma_sg_t *sg, struct iovec *iov) +{ + *desc = &vq->desc.desc[desc_idx]; + + if (virtio_vring_split_desc_is_indirect(*desc)) { + *desc_table_size = (*desc)->len / sizeof(struct vring_desc); + *desc_table = virtio_vring_desc_to_iov(dev, *desc, sg, iov); + *desc = *desc_table; + if (*desc == NULL) { + return -EINVAL; + } + return 0; + } + + *desc_table = vq->desc.desc; + *desc_table_size = vq->qsize; + + return 0; +} + +static inline dma_sg_t * +virtio_req_to_sg_t(struct vfu_virtio_req *req, uint32_t iovcnt) +{ + return (dma_sg_t *)(req->sg + iovcnt * dma_sg_size()); +} + +static inline struct vfu_virtio_req * +vfu_virtio_dev_get_req(struct vfu_virtio_endpoint *virtio_endpoint, struct vfu_virtio_vq *vq) +{ + struct vfu_virtio_req *req; + + req = STAILQ_FIRST(&vq->free_reqs); + if (req == NULL) { + return NULL; + } + STAILQ_REMOVE_HEAD(&vq->free_reqs, link); + + req->iovcnt = 0; + req->used_len = 0; + req->payload_size = 0; + req->req_idx = 0; + req->buffer_id = 0; + req->num_descs = 0; + + return req; +} + +void +vfu_virtio_dev_put_req(struct vfu_virtio_req *req) +{ + struct vfu_virtio_dev *dev = req->dev; + struct vfu_virtio_endpoint *virtio_endpoint = dev->virtio_endpoint; + vfu_ctx_t *vfu_ctx = spdk_vfu_get_vfu_ctx(virtio_endpoint->endpoint); + + if (req->indirect_iov->iov_base) { + vfu_sgl_put(vfu_ctx, req->indirect_sg, req->indirect_iov, 1); + req->indirect_iov->iov_base = NULL; + req->indirect_iov->iov_len = 0; + } + + if (req->iovcnt) { + vfu_sgl_put(vfu_ctx, virtio_req_to_sg_t(req, 0), req->iovs, req->iovcnt); + req->iovcnt = 0; + } + + STAILQ_INSERT_HEAD(&req->vq->free_reqs, req, link); +} + +void +vfu_virtio_finish_req(struct vfu_virtio_req *req) +{ + struct vfu_virtio_dev *dev = req->dev; + struct vfu_virtio_endpoint *virtio_endpoint = dev->virtio_endpoint; + + assert(virtio_endpoint->io_outstanding); + virtio_endpoint->io_outstanding--; + + if (!virtio_guest_has_feature(req->dev, VIRTIO_F_RING_PACKED)) { + virtio_vq_used_ring_split_enqueue(req->vq, req->req_idx, req->used_len); + } else { + virtio_vq_used_ring_packed_enqueue(req->vq, req->buffer_id, req->num_descs, req->used_len); + } + + vfu_virtio_dev_put_req(req); +} + +static inline void +vfu_virtio_dev_free_reqs(struct vfu_virtio_endpoint *virtio_endpoint, struct vfu_virtio_dev *dev) +{ + struct vfu_virtio_req *req; + struct vfu_virtio_vq *vq; + uint32_t i; + + for (i = 0; i < dev->num_queues; i++) { + vq = &dev->vqs[i]; + while (!STAILQ_EMPTY(&vq->free_reqs)) { + req = STAILQ_FIRST(&vq->free_reqs); + STAILQ_REMOVE_HEAD(&vq->free_reqs, link); + vfu_virtio_vq_free_req(virtio_endpoint, vq, req); + } + } +} + +static int +virtio_dev_split_iovs_setup(struct vfu_virtio_dev *dev, struct vfu_virtio_vq *vq, + uint16_t desc_idx, struct vfu_virtio_req *req) +{ + struct vring_desc *desc, *desc_table; + uint32_t desc_table_size, len = 0; + uint32_t desc_handled_cnt = 0; + int rc; + + rc = virtio_split_vring_get_desc(dev, vq, desc_idx, &desc, + &desc_table, &desc_table_size, + req->indirect_sg, req->indirect_iov); + if (spdk_unlikely(rc)) { + SPDK_ERRLOG("Invalid descriptor at index %"PRIu16".\n", desc_idx); + return rc; + } + + assert(req->iovcnt == 0); + + while (true) { + if (spdk_unlikely(!virtio_vring_desc_to_iov(dev, desc, virtio_req_to_sg_t(req, req->iovcnt), + &req->iovs[req->iovcnt]))) { + return -EINVAL; + } + req->desc_writeable[req->iovcnt] = false; + if (virtio_vring_split_desc_is_wr(desc)) { + req->desc_writeable[req->iovcnt] = true; + } + + req->iovcnt++; + len += desc->len; + + rc = virtio_vring_split_desc_get_next(&desc, desc_table, desc_table_size); + if (spdk_unlikely(rc)) { + return rc; + } else if (desc == NULL) { + break; + } + + desc_handled_cnt++; + if (spdk_unlikely(desc_handled_cnt > desc_table_size)) { + return -EINVAL; + } + } + + req->payload_size = len; + + return 0; +} + +void +virtio_vq_used_ring_split_enqueue(struct vfu_virtio_vq *vq, uint16_t req_idx, uint32_t used_len) +{ + uint16_t last_idx = vq->last_used_idx & (vq->qsize - 1); + + SPDK_DEBUGLOG(vfu_virtio, + "Queue %u - USED RING: last_idx=%"PRIu16" req_idx=%"PRIu16" used_len=%"PRIu32"\n", + vq->id, last_idx, req_idx, used_len); + + vq->used.used->ring[last_idx].id = req_idx; + vq->used.used->ring[last_idx].len = used_len; + vq->last_used_idx++; + + spdk_smp_wmb(); + + *(volatile uint16_t *)&vq->used.used->idx = vq->last_used_idx; + + vq->used_req_cnt++; +} + +void +virtio_vq_used_ring_packed_enqueue(struct vfu_virtio_vq *vq, uint16_t buffer_id, uint32_t num_descs, + uint32_t used_len) +{ + struct vring_packed_desc *desc = &vq->desc.desc_packed[vq->last_used_idx]; + + SPDK_DEBUGLOG(vfu_virtio, + "Queue %u - USED RING: buffer_id=%"PRIu16" num_descs=%u used_len=%"PRIu32"\n", + vq->id, buffer_id, num_descs, used_len); + + if (spdk_unlikely(virtio_vring_packed_is_used(desc, vq->packed.used_phase))) { + SPDK_ERRLOG("descriptor has been used before\n"); + return; + } + + /* In used desc addr is unused and len specifies the buffer length + * that has been written to by the device. + */ + desc->addr = 0; + desc->len = used_len; + + /* This bit specifies whether any data has been written by the device */ + if (used_len != 0) { + desc->flags |= VRING_DESC_F_WRITE; + } + + /* Buffer ID is included in the last descriptor in the list. + * The driver needs to keep track of the size of the list corresponding + * to each buffer ID. + */ + desc->id = buffer_id; + + /* A device MUST NOT make the descriptor used before buffer_id is + * written to the descriptor. + */ + spdk_smp_wmb(); + + /* To mark a desc as used, the device sets the F_USED bit in flags to match + * the internal Device ring wrap counter. It also sets the F_AVAIL bit to + * match the same value. + */ + if (vq->packed.used_phase) { + desc->flags |= (1 << VRING_PACKED_DESC_F_AVAIL); + desc->flags |= (1 << VRING_PACKED_DESC_F_USED); + } else { + desc->flags &= ~(1 << VRING_PACKED_DESC_F_AVAIL); + desc->flags &= ~(1 << VRING_PACKED_DESC_F_USED); + } + + vq->last_used_idx += num_descs; + if (vq->last_used_idx >= vq->qsize) { + vq->last_used_idx -= vq->qsize; + vq->packed.used_phase = !vq->packed.used_phase; + } + + vq->used_req_cnt++; +} + +static int +vfu_virtio_vq_post_irq(struct vfu_virtio_dev *dev, struct vfu_virtio_vq *vq) +{ + struct vfu_virtio_endpoint *virtio_endpoint = dev->virtio_endpoint; + vfu_ctx_t *vfu_ctx = spdk_vfu_get_vfu_ctx(virtio_endpoint->endpoint); + + vq->used_req_cnt = 0; + + if (spdk_vfu_endpoint_msix_enabled(virtio_endpoint->endpoint)) { + SPDK_DEBUGLOG(vfu_virtio, "%s: Queue %u post MSIX IV %u\n", + spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), + vq->id, vq->vector); + return vfu_irq_trigger(vfu_ctx, vq->vector); + } else { + if (!spdk_vfu_endpoint_intx_enabled(virtio_endpoint->endpoint)) { + SPDK_DEBUGLOG(vfu_virtio, "%s: IRQ disabled\n", + spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint)); + return 0; + } + + SPDK_DEBUGLOG(vfu_virtio, "%s: Queue %u post ISR\n", + spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), vq->id); + dev->cfg.isr = 1; + return vfu_irq_trigger(vfu_ctx, 0); + } +} + +void +vfu_virtio_vq_flush_irq(struct vfu_virtio_dev *dev, struct vfu_virtio_vq *vq) +{ + struct vfu_virtio_endpoint *virtio_endpoint = dev->virtio_endpoint; + uint32_t delay_us; + + if (vq->used_req_cnt == 0) { + return; + } + + /* No need to notify client */ + if (virtio_queue_event_is_suppressed(dev, vq)) { + return; + } + + /* Interrupt coalescing disabled */ + if (!virtio_endpoint->coalescing_delay_us) { + vfu_virtio_vq_post_irq(dev, vq); + return; + } + + /* No need for event right now */ + if (spdk_get_ticks() < vq->next_event_time) { + return; + } + + vfu_virtio_vq_post_irq(dev, vq); + + delay_us = virtio_endpoint->coalescing_delay_us; + vq->next_event_time = spdk_get_ticks() + delay_us * spdk_get_ticks_hz() / (1000000ULL); +} + +int +vfu_virito_dev_process_split_ring(struct vfu_virtio_dev *dev, struct vfu_virtio_vq *vq) +{ + struct vfu_virtio_endpoint *virtio_endpoint = dev->virtio_endpoint; + struct vfu_virtio_req *req; + uint16_t reqs_idx[VIRTIO_DEV_VRING_MAX_REQS]; + uint16_t reqs_cnt, i; + int ret; + + reqs_cnt = virtio_dev_split_get_avail_reqs(dev, vq, reqs_idx, VIRTIO_DEV_VRING_MAX_REQS); + if (!reqs_cnt) { + return 0; + } + + SPDK_DEBUGLOG(vfu_virtio, "%s: get %u descriptors\n", dev->name, reqs_cnt); + + for (i = 0; i < reqs_cnt; i++) { + req = vfu_virtio_dev_get_req(virtio_endpoint, vq); + if (spdk_unlikely(!req)) { + SPDK_ERRLOG("Error to get request\n"); + /* TODO: address the error case */ + return -EIO; + } + + req->req_idx = reqs_idx[i]; + ret = virtio_dev_split_iovs_setup(dev, vq, req->req_idx, req); + if (spdk_unlikely(ret)) { + /* let the device to response this error */ + SPDK_ERRLOG("Split vring setup failed with index %u\n", i); + } + + assert(virtio_endpoint->virtio_ops.exec_request); + virtio_endpoint->io_outstanding++; + virtio_endpoint->virtio_ops.exec_request(virtio_endpoint, vq, req); + } + + return i; +} + +struct vfu_virtio_req * +virito_dev_split_ring_get_next_avail_req(struct vfu_virtio_dev *dev, struct vfu_virtio_vq *vq) +{ + struct vfu_virtio_endpoint *virtio_endpoint = dev->virtio_endpoint; + struct vfu_virtio_req *req; + uint16_t reqs_idx[VIRTIO_DEV_VRING_MAX_REQS]; + uint16_t reqs_cnt; + int ret; + + reqs_cnt = virtio_dev_split_get_avail_reqs(dev, vq, reqs_idx, 1); + if (!reqs_cnt) { + return NULL; + } + assert(reqs_cnt == 1); + + SPDK_DEBUGLOG(vfu_virtio, "%s: get 1 descriptors\n", dev->name); + + req = vfu_virtio_dev_get_req(virtio_endpoint, vq); + if (!req) { + SPDK_ERRLOG("Error to get request\n"); + return NULL; + } + + req->req_idx = reqs_idx[0]; + ret = virtio_dev_split_iovs_setup(dev, vq, req->req_idx, req); + if (ret) { + SPDK_ERRLOG("Split vring setup failed\n"); + vfu_virtio_dev_put_req(req); + return NULL; + } + + return req; +} + +static inline void * +virtio_vring_packed_desc_to_iov(struct vfu_virtio_dev *dev, struct vring_packed_desc *desc, + dma_sg_t *sg, struct iovec *iov) +{ + struct vfu_virtio_endpoint *virtio_endpoint = dev->virtio_endpoint; + + return spdk_vfu_map_one(virtio_endpoint->endpoint, desc->addr, desc->len, + sg, iov, PROT_READ | PROT_WRITE); +} + +static int +virtio_dev_packed_iovs_setup(struct vfu_virtio_dev *dev, struct vfu_virtio_vq *vq, + uint16_t last_avail_idx, + struct vring_packed_desc *current_desc, struct vfu_virtio_req *req) +{ + struct vring_packed_desc *desc, *desc_table = NULL; + uint16_t new_idx, num_descs, desc_table_size = 0; + uint32_t len = 0; + + SPDK_DEBUGLOG(vfu_virtio, "%s: last avail idx %u, req %p\n", dev->name, last_avail_idx, req); + + desc = NULL; + num_descs = 1; + if (virtio_vring_packed_desc_is_indirect(current_desc)) { + req->buffer_id = current_desc->id; + desc_table = virtio_vring_packed_desc_to_iov(dev, current_desc, req->indirect_sg, + req->indirect_iov); + if (spdk_unlikely(desc_table == NULL)) { + SPDK_ERRLOG("Map Indirect Desc to IOV failed\n"); + return -EINVAL; + } + desc_table_size = current_desc->len / sizeof(struct vring_packed_desc); + desc = desc_table; + SPDK_DEBUGLOG(vfu_virtio, "%s: indirect desc %p, desc size %u, req %p\n", + dev->name, desc_table, desc_table_size, req); + } else { + desc = current_desc; + } + + assert(req->iovcnt == 0); + /* Map descs to IOVs */ + new_idx = last_avail_idx; + while (1) { + assert(desc != NULL); + if (spdk_unlikely(req->iovcnt == VIRTIO_DEV_MAX_IOVS)) { + SPDK_ERRLOG("Max IOVs in request reached (iovcnt = %d).\n", req->iovcnt); + return -EINVAL; + } + + if (spdk_unlikely(!virtio_vring_packed_desc_to_iov(dev, desc, virtio_req_to_sg_t(req, req->iovcnt), + &req->iovs[req->iovcnt]))) { + SPDK_ERRLOG("Map Desc to IOV failed (iovcnt = %d).\n", req->iovcnt); + return -EINVAL; + } + req->desc_writeable[req->iovcnt] = false; + if (virtio_vring_packed_desc_is_wr(desc)) { + req->desc_writeable[req->iovcnt] = true; + } + + req->iovcnt++; + len += desc->len; + + /* get next desc */ + if (desc_table) { + if (req->iovcnt < desc_table_size) { + desc = &desc_table[req->iovcnt]; + } else { + desc = NULL; + } + } else { + if ((desc->flags & VRING_DESC_F_NEXT) == 0) { + req->buffer_id = desc->id; + desc = NULL; + } else { + new_idx = (new_idx + 1) % vq->qsize; + desc = &vq->desc.desc_packed[new_idx]; + num_descs++; + req->buffer_id = desc->id; + } + } + + if (desc == NULL) { + break; + } + } + + req->num_descs = num_descs; + vq->last_avail_idx = (new_idx + 1) % vq->qsize; + if (vq->last_avail_idx < last_avail_idx) { + vq->packed.avail_phase = !vq->packed.avail_phase; + } + + req->payload_size = len; + + SPDK_DEBUGLOG(vfu_virtio, "%s: req %p, iovcnt %u, num_descs %u\n", + dev->name, req, req->iovcnt, num_descs); + return 0; +} + +int +vfu_virito_dev_process_packed_ring(struct vfu_virtio_dev *dev, struct vfu_virtio_vq *vq) +{ + struct vfu_virtio_endpoint *virtio_endpoint = dev->virtio_endpoint; + struct vring_packed_desc *desc; + int ret; + struct vfu_virtio_req *req; + uint16_t i, max_reqs; + + max_reqs = VIRTIO_DEV_VRING_MAX_REQS; + for (i = 0; i < max_reqs; i++) { + desc = &vq->desc.desc_packed[vq->last_avail_idx]; + if (!virtio_vring_packed_is_avail(desc, vq->packed.avail_phase)) { + return i; + } + + req = vfu_virtio_dev_get_req(virtio_endpoint, vq); + if (spdk_unlikely(!req)) { + SPDK_ERRLOG("Error to get request\n"); + /* TODO: address the error case */ + assert(false); + return -EIO; + } + + ret = virtio_dev_packed_iovs_setup(dev, vq, vq->last_avail_idx, desc, req); + if (spdk_unlikely(ret)) { + /* let the device to response the error */ + SPDK_ERRLOG("virtio_dev_packed_iovs_setup failed\n"); + } + + assert(virtio_endpoint->virtio_ops.exec_request); + virtio_endpoint->io_outstanding++; + virtio_endpoint->virtio_ops.exec_request(virtio_endpoint, vq, req); + } + + return i; +} + +struct vfu_virtio_req * +virito_dev_packed_ring_get_next_avail_req(struct vfu_virtio_dev *dev, struct vfu_virtio_vq *vq) +{ + struct vfu_virtio_endpoint *virtio_endpoint = dev->virtio_endpoint; + struct vring_packed_desc *desc; + int ret; + struct vfu_virtio_req *req; + + desc = &vq->desc.desc_packed[vq->last_avail_idx]; + if (!virtio_vring_packed_is_avail(desc, vq->packed.avail_phase)) { + return NULL; + } + + SPDK_DEBUGLOG(vfu_virtio, "%s: get 1 descriptors\n", dev->name); + + req = vfu_virtio_dev_get_req(virtio_endpoint, vq); + if (!req) { + SPDK_ERRLOG("Error to get request\n"); + return NULL; + } + + ret = virtio_dev_packed_iovs_setup(dev, vq, vq->last_avail_idx, desc, req); + if (ret) { + SPDK_ERRLOG("virtio_dev_packed_iovs_setup failed\n"); + vfu_virtio_dev_put_req(req); + return NULL; + } + + return req; +} + +static int +virtio_vfu_pci_common_cfg(struct vfu_virtio_endpoint *virtio_endpoint, char *buf, + size_t count, loff_t pos, bool is_write) +{ + struct vfu_virtio_dev *dev = virtio_endpoint->dev; + uint32_t offset, value = 0; + int ret; + + assert(count <= 4); + offset = pos - VIRTIO_PCI_COMMON_CFG_OFFSET; + + if (is_write) { + memcpy(&value, buf, count); + switch (offset) { + case VIRTIO_PCI_COMMON_DFSELECT: + dev->cfg.host_feature_select = value; + SPDK_DEBUGLOG(vfu_virtio, "%s: WRITE PCI_COMMON_DFSELECT with 0x%x\n", + spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), + value); + break; + case VIRTIO_PCI_COMMON_GFSELECT: + dev->cfg.guest_feature_select = value; + SPDK_DEBUGLOG(vfu_virtio, "%s: WRITE PCI_COMMON_GFSELECT with 0x%x\n", + spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), + value); + break; + case VIRTIO_PCI_COMMON_GF: + assert(dev->cfg.guest_feature_select <= 1); + if (dev->cfg.guest_feature_select) { + dev->cfg.guest_feat_hi = value; + SPDK_DEBUGLOG(vfu_virtio, "%s: WRITE PCI_COMMON_GF_HI with 0x%x\n", + spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), + value); + } else { + dev->cfg.guest_feat_lo = value; + SPDK_DEBUGLOG(vfu_virtio, "%s: WRITE PCI_COMMON_GF_LO with 0x%x\n", + spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), + value); + } + + ret = virtio_dev_set_features(dev, + (((uint64_t)dev->cfg.guest_feat_hi << 32) | dev->cfg.guest_feat_lo)); + if (ret) { + return ret; + } + break; + case VIRTIO_PCI_COMMON_MSIX: + dev->cfg.msix_config = value; + SPDK_DEBUGLOG(vfu_virtio, "%s: WRITE PCI_COMMON_MSIX with 0x%x\n", + spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), + value); + break; + case VIRTIO_PCI_COMMON_STATUS: + SPDK_DEBUGLOG(vfu_virtio, "%s: WRITE PCI_COMMON_STATUS with 0x%x\n", + spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), + value); + ret = virtio_dev_set_status(dev, value); + if (ret) { + return ret; + } + break; + case VIRTIO_PCI_COMMON_Q_SELECT: + if (value < VIRTIO_DEV_MAX_VQS) { + dev->cfg.queue_select = value; + } + SPDK_DEBUGLOG(vfu_virtio, "%s: WRITE PCI_COMMON_Q_SELECT with 0x%x\n", + spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), + value); + break; + case VIRTIO_PCI_COMMON_Q_SIZE: + dev->vqs[dev->cfg.queue_select].qsize = value; + SPDK_DEBUGLOG(vfu_virtio, "%s: WRITE PCI_COMMON_Q_SIZE with 0x%x\n", + spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), + value); + break; + case VIRTIO_PCI_COMMON_Q_MSIX: + dev->vqs[dev->cfg.queue_select].vector = value; + SPDK_DEBUGLOG(vfu_virtio, "%s: WRITE PCI_COMMON_Q_MSIX with 0x%x\n", + spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), + value); + break; + case VIRTIO_PCI_COMMON_Q_ENABLE: + SPDK_DEBUGLOG(vfu_virtio, "%s: WRITE PCI_COMMON_Q_ENABLE with 0x%x\n", + spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), + value); + if (value == 1) { + ret = virtio_dev_enable_vq(dev, dev->cfg.queue_select); + if (ret) { + return ret; + } + } else { + ret = virtio_dev_disable_vq(dev, dev->cfg.queue_select); + if (ret) { + return ret; + } + } + break; + case VIRTIO_PCI_COMMON_Q_DESCLO: + dev->vqs[dev->cfg.queue_select].desc_lo = value; + SPDK_DEBUGLOG(vfu_virtio, "%s: WRITE queue %u PCI_COMMON_Q_DESCLO with 0x%x\n", + spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), dev->cfg.queue_select, value); + break; + case VIRTIO_PCI_COMMON_Q_DESCHI: + dev->vqs[dev->cfg.queue_select].desc_hi = value; + SPDK_DEBUGLOG(vfu_virtio, "%s: WRITE queue %u PCI_COMMON_Q_DESCHI with 0x%x\n", + spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), dev->cfg.queue_select, value); + break; + case VIRTIO_PCI_COMMON_Q_AVAILLO: + dev->vqs[dev->cfg.queue_select].avail_lo = value; + SPDK_DEBUGLOG(vfu_virtio, "%s: WRITE queue %u PCI_COMMON_Q_AVAILLO with 0x%x\n", + spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), dev->cfg.queue_select, value); + break; + case VIRTIO_PCI_COMMON_Q_AVAILHI: + dev->vqs[dev->cfg.queue_select].avail_hi = value; + SPDK_DEBUGLOG(vfu_virtio, "%s: WRITE queue %u PCI_COMMON_Q_AVAILHI with 0x%x\n", + spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), dev->cfg.queue_select, value); + break; + case VIRTIO_PCI_COMMON_Q_USEDLO: + dev->vqs[dev->cfg.queue_select].used_lo = value; + SPDK_DEBUGLOG(vfu_virtio, "%s: WRITE queue %u PCI_COMMON_Q_USEDLO with 0x%x\n", + spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), dev->cfg.queue_select, value); + break; + case VIRTIO_PCI_COMMON_Q_USEDHI: + dev->vqs[dev->cfg.queue_select].used_hi = value; + SPDK_DEBUGLOG(vfu_virtio, "%s: WRITE queue %u PCI_COMMON_Q_USEDHI with 0x%x\n", + spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), dev->cfg.queue_select, value); + break; + + default: + SPDK_ERRLOG("%s: WRITE UNSUPPORTED offset 0x%x\n", + spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), offset); + errno = EIO; + return -1; + } + } else { + switch (offset) { + case VIRTIO_PCI_COMMON_DFSELECT: + value = dev->cfg.host_feature_select; + SPDK_DEBUGLOG(vfu_virtio, "%s: READ PCI_COMMON_DFSELECT with 0x%x\n", + spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), + value); + break; + case VIRTIO_PCI_COMMON_DF: + assert(dev->cfg.host_feature_select <= 1); + if (dev->cfg.host_feature_select) { + value = dev->host_features >> 32; + SPDK_DEBUGLOG(vfu_virtio, "%s: READ PCI_COMMON_DF_HI with 0x%x\n", + spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), + value); + } else { + value = dev->host_features; + SPDK_DEBUGLOG(vfu_virtio, "%s: READ PCI_COMMON_DF_LO with 0x%x\n", + spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), + value); + } + break; + case VIRTIO_PCI_COMMON_GFSELECT: + value = dev->cfg.guest_feature_select; + SPDK_DEBUGLOG(vfu_virtio, "%s: READ PCI_COMMON_GFSELECT with 0x%x\n", + spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), + value); + break; + case VIRTIO_PCI_COMMON_GF: + assert(dev->cfg.guest_feature_select <= 1); + if (dev->cfg.guest_feature_select) { + value = dev->cfg.guest_feat_hi; + SPDK_DEBUGLOG(vfu_virtio, "%s: READ PCI_COMMON_GF_HI with 0x%x\n", + spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), + value); + } else { + value = dev->cfg.guest_feat_lo; + SPDK_DEBUGLOG(vfu_virtio, "%s: READ PCI_COMMON_GF_LO with 0x%x\n", + spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), + value); + } + break; + case VIRTIO_PCI_COMMON_MSIX: + value = dev->cfg.msix_config; + SPDK_DEBUGLOG(vfu_virtio, "%s: READ PCI_COMMON_MSIX with 0x%x\n", + spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), + value); + break; + case VIRTIO_PCI_COMMON_NUMQ: + value = dev->num_queues; + SPDK_DEBUGLOG(vfu_virtio, "%s: READ PCI_COMMON_NUMQ with 0x%x\n", + spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), + value); + break; + case VIRTIO_PCI_COMMON_STATUS: + value = dev->cfg.device_status; + SPDK_DEBUGLOG(vfu_virtio, "%s: READ PCI_COMMON_STATUS with 0x%x\n", + spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), + value); + break; + case VIRTIO_PCI_COMMON_CFGGENERATION: + value = dev->cfg.config_generation; + SPDK_DEBUGLOG(vfu_virtio, "%s: READ PCI_COMMON_CFGGENERATION with 0x%x\n", + spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), + value); + break; + case VIRTIO_PCI_COMMON_Q_NOFF: + value = dev->cfg.queue_select; + SPDK_DEBUGLOG(vfu_virtio, "%s: READ PCI_COMMON_Q_NOFF with 0x%x\n", + spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), + value); + break; + case VIRTIO_PCI_COMMON_Q_SELECT: + value = dev->cfg.queue_select; + SPDK_DEBUGLOG(vfu_virtio, "%s: READ PCI_COMMON_Q_SELECT with 0x%x\n", + spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), + value); + break; + case VIRTIO_PCI_COMMON_Q_SIZE: + value = dev->vqs[dev->cfg.queue_select].qsize; + SPDK_DEBUGLOG(vfu_virtio, "%s: READ queue %u PCI_COMMON_Q_SIZE with 0x%x\n", + spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), + dev->cfg.queue_select, value); + break; + case VIRTIO_PCI_COMMON_Q_MSIX: + value = dev->vqs[dev->cfg.queue_select].vector; + SPDK_DEBUGLOG(vfu_virtio, "%s: READ queue %u PCI_COMMON_Q_MSIX with 0x%x\n", + spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), + dev->cfg.queue_select, value); + break; + case VIRTIO_PCI_COMMON_Q_ENABLE: + value = dev->vqs[dev->cfg.queue_select].enabled; + SPDK_DEBUGLOG(vfu_virtio, "%s: READ queue %u PCI_COMMON_Q_ENABLE with 0x%x\n", + spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), dev->cfg.queue_select, value); + break; + case VIRTIO_PCI_COMMON_Q_DESCLO: + value = dev->vqs[dev->cfg.queue_select].desc_lo; + SPDK_DEBUGLOG(vfu_virtio, "%s: READ queue %u PCI_COMMON_Q_DESCLO with 0x%x\n", + spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), dev->cfg.queue_select, value); + break; + case VIRTIO_PCI_COMMON_Q_DESCHI: + value = dev->vqs[dev->cfg.queue_select].desc_hi; + SPDK_DEBUGLOG(vfu_virtio, "%s: READ queue %u PCI_COMMON_Q_DESCHI with 0x%x\n", + spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), dev->cfg.queue_select, value); + break; + case VIRTIO_PCI_COMMON_Q_AVAILLO: + value = dev->vqs[dev->cfg.queue_select].avail_lo; + SPDK_DEBUGLOG(vfu_virtio, "%s: READ queue %u PCI_COMMON_Q_AVAILLO with 0x%x\n", + spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), dev->cfg.queue_select, value); + break; + case VIRTIO_PCI_COMMON_Q_AVAILHI: + value = dev->vqs[dev->cfg.queue_select].avail_hi; + SPDK_DEBUGLOG(vfu_virtio, "%s: READ queue %u PCI_COMMON_Q_AVAILHI with 0x%x\n", + spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), dev->cfg.queue_select, value); + break; + case VIRTIO_PCI_COMMON_Q_USEDLO: + value = dev->vqs[dev->cfg.queue_select].used_lo; + SPDK_DEBUGLOG(vfu_virtio, "%s: READ queue %u PCI_COMMON_Q_USEDLO with 0x%x\n", + spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), dev->cfg.queue_select, value); + break; + case VIRTIO_PCI_COMMON_Q_USEDHI: + value = dev->vqs[dev->cfg.queue_select].used_hi; + SPDK_DEBUGLOG(vfu_virtio, "%s: READ queue %u PCI_COMMON_Q_USEDHI with 0x%x\n", + spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), dev->cfg.queue_select, value); + break; + default: + SPDK_ERRLOG("%s: READ UNSUPPORTED offset 0x%x\n", + spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), offset); + errno = EIO; + return -1; + } + memcpy(buf, &value, count); + } + + return count; +} + +static int +virtio_vfu_device_specific_cfg(struct vfu_virtio_endpoint *virtio_endpoint, char *buf, + size_t count, loff_t pos, bool is_write) +{ + loff_t offset; + int ret = -1; + + assert(count <= 8); + offset = pos - VIRTIO_PCI_SPECIFIC_CFG_OFFSET; + if (!is_write) { + if (virtio_endpoint->virtio_ops.get_config) { + ret = virtio_endpoint->virtio_ops.get_config(virtio_endpoint, buf, offset, count); + } + } else { + if (virtio_endpoint->virtio_ops.set_config) { + ret = virtio_endpoint->virtio_ops.set_config(virtio_endpoint, buf, offset, count); + } + } + + if (ret < 0) { + return ret; + } + + return count; +} + +static int +virtio_vfu_pci_isr(struct vfu_virtio_endpoint *virtio_endpoint, char *buf, + size_t count, bool is_write) +{ + uint8_t *isr; + + if (count != 1) { + SPDK_ERRLOG("ISR register is 1 byte\n"); + errno = EIO; + return -1; + } + + isr = buf; + + if (!is_write) { + SPDK_DEBUGLOG(vfu_virtio, "READ PCI ISR\n"); + /* Read-Acknowledge Clear */ + *isr = virtio_endpoint->dev->cfg.isr; + virtio_endpoint->dev->cfg.isr = 0; + } else { + SPDK_ERRLOG("ISR register is RO\n"); + errno = EIO; + return -1; + } + + return count; +} + +static ssize_t +virtio_vfu_access_bar4(vfu_ctx_t *vfu_ctx, char *buf, size_t count, + loff_t pos, + bool is_write) +{ + struct spdk_vfu_endpoint *endpoint = vfu_get_private(vfu_ctx); + struct vfu_virtio_endpoint *virtio_endpoint = spdk_vfu_get_endpoint_private(endpoint); + uint64_t start, end; + + start = pos; + end = start + count; + SPDK_DEBUGLOG(vfu_virtio, "%s: %s bar4 0x%"PRIX64"-0x%"PRIX64", len = %lu\n", + spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), + is_write ? "write" : "read", start, end - 1, count); + + if (end < VIRTIO_PCI_COMMON_CFG_OFFSET + VIRTIO_PCI_COMMON_CFG_LENGTH) { + /* virtio PCI common configuration */ + return virtio_vfu_pci_common_cfg(virtio_endpoint, buf, count, pos, is_write); + } else if (start >= VIRTIO_PCI_ISR_ACCESS_OFFSET && + end < VIRTIO_PCI_ISR_ACCESS_OFFSET + VIRTIO_PCI_ISR_ACCESS_LENGTH) { + /* ISR access */ + return virtio_vfu_pci_isr(virtio_endpoint, buf, count, is_write); + } else if (start >= VIRTIO_PCI_SPECIFIC_CFG_OFFSET && + end < VIRTIO_PCI_SPECIFIC_CFG_OFFSET + VIRTIO_PCI_SPECIFIC_CFG_LENGTH) { + /* Device specific configuration */ + return virtio_vfu_device_specific_cfg(virtio_endpoint, buf, count, pos, is_write); + } else if (start >= VIRTIO_PCI_NOTIFICATIONS_OFFSET && + end < VIRTIO_PCI_NOTIFICATIONS_OFFSET + VIRTIO_PCI_NOTIFICATIONS_LENGTH) { + /* Notifications */ + /* Sparse mmap region by default, there are no MMIO R/W messages */ + assert(false); + return count; + } else { + assert(false); + } + + return 0; +} + +int +vfu_virtio_post_memory_add(struct spdk_vfu_endpoint *endpoint, void *map_start, void *map_end) +{ + struct vfu_virtio_endpoint *virtio_endpoint = spdk_vfu_get_endpoint_private(endpoint); + struct vfu_virtio_dev *dev = virtio_endpoint->dev; + uint32_t i; + + if (!dev) { + return 0; + } + + for (i = 0; i < dev->num_queues; i++) { + /* Try to remap VQs if necessary */ + virtio_dev_map_vq(dev, &dev->vqs[i]); + } + + return 0; +} + +int +vfu_virtio_pre_memory_remove(struct spdk_vfu_endpoint *endpoint, void *map_start, void *map_end) +{ + struct vfu_virtio_endpoint *virtio_endpoint = spdk_vfu_get_endpoint_private(endpoint); + + if (virtio_endpoint->dev != NULL) { + vfu_virtio_dev_unmap_vqs(virtio_endpoint->dev, map_start, map_end); + } + + return 0; +} + +int +vfu_virtio_pci_reset_cb(struct spdk_vfu_endpoint *endpoint) +{ + struct vfu_virtio_endpoint *virtio_endpoint = spdk_vfu_get_endpoint_private(endpoint); + + if (virtio_endpoint->dev) { + vfu_virtio_dev_stop(virtio_endpoint->dev); + vfu_virtio_dev_reset(virtio_endpoint->dev); + } + + return 0; +} + +static ssize_t +access_pci_config(vfu_ctx_t *vfu_ctx, char *buf, size_t count, loff_t offset, + bool is_write) +{ + struct spdk_vfu_endpoint *endpoint = vfu_get_private(vfu_ctx); + void *pci_config = spdk_vfu_endpoint_get_pci_config(endpoint); + + SPDK_DEBUGLOG(vfu_virtio, + "%s: PCI_CFG %s %#lx-%#lx\n", + spdk_vfu_get_endpoint_id(endpoint), is_write ? "write" : "read", + offset, offset + count); + + if (is_write) { + SPDK_ERRLOG("write %#lx-%#lx not supported\n", + offset, offset + count); + errno = EINVAL; + return -1; + } + + if (offset + count > 0x1000) { + SPDK_ERRLOG("access past end of extended PCI configuration space, want=%ld+%ld, max=%d\n", + offset, count, 0x1000); + errno = ERANGE; + return -1; + } + + memcpy(buf, ((unsigned char *)pci_config) + offset, count); + return count; +} + +static int +vfu_virtio_dev_start(struct vfu_virtio_dev *dev) +{ + struct vfu_virtio_endpoint *virtio_endpoint = dev->virtio_endpoint; + int ret = 0; + + SPDK_DEBUGLOG(vfu_virtio, "start %s\n", dev->name); + + if (virtio_dev_is_started(dev)) { + SPDK_ERRLOG("Device %s is already started\n", dev->name); + return -EFAULT; + } + + if (virtio_endpoint->virtio_ops.start_device) { + virtio_endpoint->io_outstanding = 0; + ret = virtio_endpoint->virtio_ops.start_device(virtio_endpoint); + } + + SPDK_DEBUGLOG(vfu_virtio, "%s is started with ret %d\n", dev->name, ret); + + return ret; +} + +static int +vfu_virtio_dev_stop(struct vfu_virtio_dev *dev) +{ + struct vfu_virtio_endpoint *virtio_endpoint = dev->virtio_endpoint; + int ret = 0; + + SPDK_DEBUGLOG(vfu_virtio, "stop %s\n", dev->name); + + if (!virtio_dev_is_started(dev)) { + SPDK_DEBUGLOG(vfu_virtio, "%s isn't started\n", dev->name); + return 0; + } + + if (virtio_endpoint->virtio_ops.stop_device) { + ret = virtio_endpoint->virtio_ops.stop_device(virtio_endpoint); + assert(ret == 0); + } + + /* Unmap all VQs */ + vfu_virtio_dev_unmap_vqs(dev, NULL, NULL); + + return ret; +} + +int +vfu_virtio_detach_device(struct spdk_vfu_endpoint *endpoint) +{ + struct vfu_virtio_endpoint *virtio_endpoint = spdk_vfu_get_endpoint_private(endpoint); + struct vfu_virtio_dev *dev = virtio_endpoint->dev; + + if (virtio_endpoint->dev == NULL) { + return 0; + } + + SPDK_DEBUGLOG(vfu_virtio, "detach device %s\n", dev->name); + + vfu_virtio_dev_stop(dev); + vfu_virtio_dev_free_reqs(virtio_endpoint, dev); + virtio_endpoint->dev = NULL; + free(dev); + + return 0; +} + +int +vfu_virtio_attach_device(struct spdk_vfu_endpoint *endpoint) +{ + struct vfu_virtio_endpoint *virtio_endpoint = spdk_vfu_get_endpoint_private(endpoint); + uint64_t supported_features = 0; + struct vfu_virtio_dev *dev; + struct vfu_virtio_vq *vq; + struct vfu_virtio_req *req; + uint32_t i, j; + int ret = 0; + + dev = calloc(1, sizeof(*dev) + virtio_endpoint->num_queues * 3 * dma_sg_size()); + if (dev == NULL) { + return -ENOMEM; + } + + dev->num_queues = virtio_endpoint->num_queues; + for (i = 0; i < dev->num_queues; i++) { + vq = &dev->vqs[i]; + vq->id = i; + vq->qsize = virtio_endpoint->qsize; + vq->avail.sg = (dma_sg_t *)(dev->sg + i * dma_sg_size() * 3); + vq->used.sg = (dma_sg_t *)((uint8_t *)vq->avail.sg + dma_sg_size()); + vq->desc.sg = (dma_sg_t *)((uint8_t *)vq->used.sg + dma_sg_size()); + + STAILQ_INIT(&vq->free_reqs); + for (j = 0; j <= vq->qsize; j++) { + req = vfu_virtio_vq_alloc_req(virtio_endpoint, vq); + if (!req) { + SPDK_ERRLOG("Error to allocate req\n"); + ret = -ENOMEM; + goto out; + } + req->indirect_iov = &req->iovs[VIRTIO_DEV_MAX_IOVS]; + req->indirect_sg = virtio_req_to_sg_t(req, VIRTIO_DEV_MAX_IOVS); + req->dev = dev; + req->vq = vq; + STAILQ_INSERT_TAIL(&vq->free_reqs, req, link); + } + } + + if (virtio_endpoint->virtio_ops.get_device_features) { + supported_features = virtio_endpoint->virtio_ops.get_device_features(virtio_endpoint); + } + dev->host_features = supported_features; + + snprintf(dev->name, SPDK_VFU_MAX_NAME_LEN, "%s", + spdk_vfu_get_endpoint_name(virtio_endpoint->endpoint)); + virtio_endpoint->dev = dev; + dev->virtio_endpoint = virtio_endpoint; + virtio_endpoint->thread = spdk_get_thread(); + return 0; + +out: + vfu_virtio_dev_free_reqs(virtio_endpoint, dev); + return ret; +} + +int +vfu_virtio_endpoint_setup(struct vfu_virtio_endpoint *virtio_endpoint, + struct spdk_vfu_endpoint *endpoint, + char *basename, const char *endpoint_name, + struct vfu_virtio_ops *ops) +{ + char path[PATH_MAX] = ""; + int ret; + + if (!ops) { + return -EINVAL; + } + + ret = snprintf(path, PATH_MAX, "%s%s_bar4", basename, endpoint_name); + if (ret < 0 || ret >= PATH_MAX) { + SPDK_ERRLOG("%s: error to get socket path: %s.\n", basename, spdk_strerror(errno)); + return -EINVAL; + } + + ret = open(path, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); + if (ret == -1) { + SPDK_ERRLOG("%s: failed to open device memory at %s.\n", + path, spdk_strerror(errno)); + return ret; + } + unlink(path); + + virtio_endpoint->devmem_fd = ret; + ret = ftruncate(virtio_endpoint->devmem_fd, VIRTIO_PCI_BAR4_LENGTH); + if (ret != 0) { + SPDK_ERRLOG("%s: error to ftruncate file %s.\n", path, + spdk_strerror(errno)); + close(virtio_endpoint->devmem_fd); + return ret; + } + + virtio_endpoint->doorbells = mmap(NULL, VIRTIO_PCI_NOTIFICATIONS_LENGTH, PROT_READ | PROT_WRITE, + MAP_SHARED, + virtio_endpoint->devmem_fd, VIRTIO_PCI_NOTIFICATIONS_OFFSET); + if (virtio_endpoint->doorbells == MAP_FAILED) { + SPDK_ERRLOG("%s: error to mmap file %s.\n", path, spdk_strerror(errno)); + close(virtio_endpoint->devmem_fd); + return -EFAULT; + } + virtio_endpoint->endpoint = endpoint; + virtio_endpoint->virtio_ops = *ops; + virtio_endpoint->num_queues = VIRTIO_DEV_MAX_VQS; + virtio_endpoint->qsize = VIRTIO_VQ_DEFAULT_SIZE; + + SPDK_DEBUGLOG(vfu_virtio, "mmap file %s, devmem_fd %d\n", path, virtio_endpoint->devmem_fd); + return 0; +} + +int +vfu_virtio_endpoint_destruct(struct vfu_virtio_endpoint *virtio_endpoint) +{ + if (virtio_endpoint->doorbells) { + munmap((void *)virtio_endpoint->doorbells, VIRTIO_PCI_NOTIFICATIONS_LENGTH); + } + + if (virtio_endpoint->devmem_fd) { + close(virtio_endpoint->devmem_fd); + } + + return 0; +} + +static int +vfu_virtio_quiesce_poll(void *ctx) +{ + struct vfu_virtio_endpoint *virtio_endpoint = ctx; + vfu_ctx_t *vfu_ctx = spdk_vfu_get_vfu_ctx(virtio_endpoint->endpoint); + + if (virtio_endpoint->io_outstanding) { + return SPDK_POLLER_IDLE; + } + + spdk_poller_unregister(&virtio_endpoint->quiesce_poller); + virtio_endpoint->quiesce_in_progress = false; + vfu_device_quiesced(vfu_ctx, 0); + + return SPDK_POLLER_BUSY; +} + +int +vfu_virtio_quiesce_cb(struct spdk_vfu_endpoint *endpoint) +{ + struct vfu_virtio_endpoint *virtio_endpoint = spdk_vfu_get_endpoint_private(endpoint); + + if (virtio_endpoint->quiesce_in_progress) { + return -EBUSY; + } + + if (!virtio_endpoint->io_outstanding) { + return 0; + } + + virtio_endpoint->quiesce_in_progress = true; + virtio_endpoint->quiesce_poller = SPDK_POLLER_REGISTER(vfu_virtio_quiesce_poll, virtio_endpoint, + 10); + + return -EBUSY; +} + +static struct spdk_vfu_pci_device vfu_virtio_device_info = { + .id = { + .vid = SPDK_PCI_VID_VIRTIO, + /* Realize when calling get device information */ + .did = 0x0, + .ssvid = SPDK_PCI_VID_VIRTIO, + .ssid = 0x0, + }, + + .class = { + /* 0x01, mass storage controller */ + .bcc = 0x01, + /* 0x00, SCSI controller */ + .scc = 0x00, + /* 0x00, SCSI controller - vendor specific interface */ + .pi = 0x00, + }, + + .pmcap = { + .hdr.id = PCI_CAP_ID_PM, + .pmcs.nsfrst = 0x1, + }, + + .pxcap = { + .hdr.id = PCI_CAP_ID_EXP, + .pxcaps.ver = 0x2, + .pxdcap = {.rer = 0x1, .flrc = 0x1}, + .pxdcap2.ctds = 0x1, + }, + + .msixcap = { + .hdr.id = PCI_CAP_ID_MSIX, + .mxc.ts = VIRTIO_DEV_MAX_VQS - 1, + .mtab = {.tbir = 0x1, .to = 0x0}, + .mpba = {.pbir = 0x2, .pbao = 0x0}, + }, + + .nr_vendor_caps = 4, + + .intr_ipin = 0x1, + .nr_int_irqs = 0x1, + .nr_msix_irqs = VIRTIO_DEV_MAX_VQS, + + .regions = { + /* BAR0 */ + {0}, + /* BAR1 */ + { + .access_cb = NULL, + .offset = 0, + .fd = -1, + .len = 0x1000, + .flags = VFU_REGION_FLAG_RW, + .nr_sparse_mmaps = 0, + }, + /* BAR2 */ + { + .access_cb = NULL, + .offset = 0, + .fd = -1, + .len = 0x1000, + .flags = VFU_REGION_FLAG_RW, + .nr_sparse_mmaps = 0, + }, + /* BAR3 */ + {0}, + /* BAR4 */ + { + .access_cb = virtio_vfu_access_bar4, + .offset = 0, + .fd = -1, + .len = VIRTIO_PCI_BAR4_LENGTH, + .flags = VFU_REGION_FLAG_RW | VFU_REGION_FLAG_MEM, + .nr_sparse_mmaps = 1, + .mmaps = { + { + .offset = VIRTIO_PCI_NOTIFICATIONS_OFFSET, + .len = VIRTIO_PCI_NOTIFICATIONS_LENGTH, + }, + }, + }, + /* BAR5 */ + {0}, + /* BAR6 */ + {0}, + /* ROM */ + {0}, + /* PCI Config */ + { + .access_cb = access_pci_config, + .offset = 0, + .fd = -1, + .len = 0x1000, + .flags = VFU_REGION_FLAG_RW, + .nr_sparse_mmaps = 0, + }, + }, +}; + +void +vfu_virtio_get_device_info(struct vfu_virtio_endpoint *virtio_endpoint, + struct spdk_vfu_pci_device *device_info) +{ + memcpy(device_info, &vfu_virtio_device_info, sizeof(*device_info)); + + /* BAR4 Region FD */ + device_info->regions[VFU_PCI_DEV_BAR4_REGION_IDX].fd = virtio_endpoint->devmem_fd; + SPDK_DEBUGLOG(vfu_virtio, "%s: get device information, fd %d\n", + spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), + virtio_endpoint->devmem_fd); +} + +static struct virtio_pci_cap common_cap = { + .cap_vndr = PCI_CAP_ID_VNDR, + .cap_len = sizeof(common_cap), + .cfg_type = VIRTIO_PCI_CAP_COMMON_CFG, + .bar = 4, + .offset = VIRTIO_PCI_COMMON_CFG_OFFSET, + .length = VIRTIO_PCI_COMMON_CFG_LENGTH, +}; + +static struct virtio_pci_cap isr_cap = { + .cap_vndr = PCI_CAP_ID_VNDR, + .cap_len = sizeof(isr_cap), + .cfg_type = VIRTIO_PCI_CAP_ISR_CFG, + .bar = 4, + .offset = VIRTIO_PCI_ISR_ACCESS_OFFSET, + .length = VIRTIO_PCI_ISR_ACCESS_LENGTH, +}; + +static struct virtio_pci_cap dev_cap = { + .cap_vndr = PCI_CAP_ID_VNDR, + .cap_len = sizeof(dev_cap), + .cfg_type = VIRTIO_PCI_CAP_DEVICE_CFG, + .bar = 4, + .offset = VIRTIO_PCI_SPECIFIC_CFG_OFFSET, + .length = VIRTIO_PCI_SPECIFIC_CFG_LENGTH, +}; + +static struct virtio_pci_notify_cap notify_cap = { + .cap = { + .cap_vndr = PCI_CAP_ID_VNDR, + .cap_len = sizeof(notify_cap), + .cfg_type = VIRTIO_PCI_CAP_NOTIFY_CFG, + .bar = 4, + .offset = VIRTIO_PCI_NOTIFICATIONS_OFFSET, + .length = VIRTIO_PCI_NOTIFICATIONS_LENGTH, + }, + .notify_off_multiplier = 4, +}; + +uint16_t +vfu_virtio_get_vendor_capability(struct spdk_vfu_endpoint *endpoint, char *buf, + uint16_t buf_len, + uint16_t idx) +{ + uint16_t len; + + SPDK_DEBUGLOG(vfu_virtio, "%s: get vendor capability, idx %u\n", + spdk_vfu_get_endpoint_id(endpoint), idx); + + switch (idx) { + case 0: + assert(buf_len > sizeof(common_cap)); + memcpy(buf, &common_cap, sizeof(common_cap)); + len = sizeof(common_cap); + break; + case 1: + assert(buf_len > sizeof(isr_cap)); + memcpy(buf, &isr_cap, sizeof(isr_cap)); + len = sizeof(isr_cap); + break; + case 2: + assert(buf_len > sizeof(dev_cap)); + memcpy(buf, &dev_cap, sizeof(dev_cap)); + len = sizeof(dev_cap); + break; + case 3: + assert(buf_len > sizeof(notify_cap)); + memcpy(buf, ¬ify_cap, sizeof(notify_cap)); + len = sizeof(notify_cap); + break; + default: + return 0; + } + + return len; +} + +SPDK_LOG_REGISTER_COMPONENT(vfu_virtio) diff --git a/module/vfu_device/vfu_virtio_blk.c b/module/vfu_device/vfu_virtio_blk.c new file mode 100644 index 000000000..a72e1336d --- /dev/null +++ b/module/vfu_device/vfu_virtio_blk.c @@ -0,0 +1,615 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) Intel Corporation. + * All rights reserved. + */ + +/* + * virtio-blk over vfio-user transport + */ +#include + +#include "spdk/env.h" +#include "spdk/bdev.h" +#include "spdk/bdev_module.h" +#include "spdk/stdinc.h" +#include "spdk/assert.h" +#include "spdk/barrier.h" +#include "spdk/thread.h" +#include "spdk/memory.h" +#include "spdk/util.h" +#include "spdk/log.h" +#include "spdk/string.h" +#include "spdk/likely.h" +#include "spdk/pci_ids.h" + +#include "vfu_virtio_internal.h" + +#define VIRTIO_BLK_SUPPORTED_FEATURES ((1ULL << VIRTIO_BLK_F_SIZE_MAX) | \ + (1ULL << VIRTIO_BLK_F_SEG_MAX) | \ + (1ULL << VIRTIO_BLK_F_TOPOLOGY) | \ + (1ULL << VIRTIO_BLK_F_BLK_SIZE) | \ + (1ULL << VIRTIO_BLK_F_MQ)) + +struct virtio_blk_endpoint { + struct vfu_virtio_endpoint virtio; + + /* virtio_blk specific configurations */ + struct spdk_thread *init_thread; + struct spdk_bdev *bdev; + struct spdk_bdev_desc *bdev_desc; + struct spdk_io_channel *io_channel; + struct virtio_blk_config blk_cfg; + + /* virtio_blk ring process poller */ + struct spdk_poller *ring_poller; +}; + +struct virtio_blk_req { + volatile uint8_t *status; + struct virtio_blk_endpoint *endpoint; + /* KEEP req at last */ + struct vfu_virtio_req req; +}; + +static inline struct virtio_blk_endpoint * +to_blk_endpoint(struct vfu_virtio_endpoint *virtio_endpoint) +{ + return SPDK_CONTAINEROF(virtio_endpoint, struct virtio_blk_endpoint, virtio); +} + +static inline struct virtio_blk_req * +to_blk_request(struct vfu_virtio_req *request) +{ + return SPDK_CONTAINEROF(request, struct virtio_blk_req, req); +} + +static int +vfu_virtio_blk_vring_poll(void *ctx) +{ + struct virtio_blk_endpoint *blk_endpoint = ctx; + struct vfu_virtio_dev *dev = blk_endpoint->virtio.dev; + struct vfu_virtio_vq *vq; + uint32_t i, count = 0; + + if (spdk_unlikely(!virtio_dev_is_started(dev))) { + return SPDK_POLLER_IDLE; + } + + if (spdk_unlikely(blk_endpoint->virtio.quiesce_in_progress)) { + return SPDK_POLLER_IDLE; + } + + for (i = 0; i < dev->num_queues; i++) { + vq = &dev->vqs[i]; + if (!vq->enabled || vq->q_state != VFU_VQ_ACTIVE) { + continue; + } + + vfu_virtio_vq_flush_irq(dev, vq); + + if (vq->packed.packed_ring) { + /* packed vring */ + count += vfu_virito_dev_process_packed_ring(dev, vq); + } else { + /* split vring */ + count += vfu_virito_dev_process_split_ring(dev, vq); + } + } + + return count ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE; +} + +static int +virtio_blk_start(struct vfu_virtio_endpoint *virtio_endpoint) +{ + struct virtio_blk_endpoint *blk_endpoint = to_blk_endpoint(virtio_endpoint); + + if (blk_endpoint->ring_poller) { + return 0; + } + + SPDK_DEBUGLOG(vfu_virtio_blk, "starting %s\n", virtio_endpoint->dev->name); + blk_endpoint->io_channel = spdk_bdev_get_io_channel(blk_endpoint->bdev_desc); + blk_endpoint->ring_poller = SPDK_POLLER_REGISTER(vfu_virtio_blk_vring_poll, blk_endpoint, 0); + + return 0; +} + +static void +_virtio_blk_stop_msg(void *ctx) +{ + struct virtio_blk_endpoint *blk_endpoint = ctx; + + spdk_poller_unregister(&blk_endpoint->ring_poller); + spdk_put_io_channel(blk_endpoint->io_channel); + blk_endpoint->io_channel = NULL; + + SPDK_DEBUGLOG(vfu_virtio_blk, "%s is stopped\n", + spdk_vfu_get_endpoint_id(blk_endpoint->virtio.endpoint)); +} + +static int +virtio_blk_stop(struct vfu_virtio_endpoint *virtio_endpoint) +{ + struct virtio_blk_endpoint *blk_endpoint = to_blk_endpoint(virtio_endpoint); + + if (!blk_endpoint->io_channel) { + return 0; + } + + SPDK_DEBUGLOG(vfu_virtio_blk, "%s stopping\n", spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint)); + spdk_thread_send_msg(virtio_endpoint->thread, _virtio_blk_stop_msg, blk_endpoint); + return 0; +} + +static void +virtio_blk_req_finish(struct virtio_blk_req *blk_req, uint8_t status) +{ + struct vfu_virtio_req *req = &blk_req->req; + + if (spdk_likely(blk_req->status)) { + *blk_req->status = status; + blk_req->status = NULL; + } + + vfu_virtio_finish_req(req); +} + +static void +blk_request_complete_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) +{ + struct virtio_blk_req *blk_req = cb_arg; + + SPDK_DEBUGLOG(vfu_virtio_blk, "IO done status %u\n", success); + + spdk_bdev_free_io(bdev_io); + virtio_blk_req_finish(blk_req, success ? VIRTIO_BLK_S_OK : VIRTIO_BLK_S_IOERR); +} + +static int +virtio_blk_process_req(struct vfu_virtio_endpoint *virtio_endpoint, struct vfu_virtio_vq *vq, + struct vfu_virtio_req *req) +{ + struct virtio_blk_endpoint *blk_endpoint = to_blk_endpoint(virtio_endpoint); + struct virtio_blk_req *blk_req = to_blk_request(req); + const struct virtio_blk_outhdr *hdr; + struct virtio_blk_discard_write_zeroes *desc; + struct iovec *iov; + uint16_t iovcnt; + uint64_t flush_bytes; + uint32_t type; + uint32_t payload_len; + int ret; + + blk_req->endpoint = blk_endpoint; + + iov = &req->iovs[0]; + if (spdk_unlikely(iov->iov_len != sizeof(*hdr))) { + SPDK_ERRLOG("Invalid virtio_blk header length"); + virtio_blk_req_finish(blk_req, VIRTIO_BLK_S_UNSUPP); + return -EINVAL; + } + hdr = iov->iov_base; + + iov = &req->iovs[req->iovcnt - 1]; + if (spdk_unlikely(iov->iov_len != 1)) { + SPDK_ERRLOG("Invalid virtio_blk response length"); + virtio_blk_req_finish(blk_req, VIRTIO_BLK_S_UNSUPP); + return -EINVAL; + } + blk_req->status = iov->iov_base; + + payload_len = req->payload_size; + payload_len -= sizeof(*hdr) + 1; + iovcnt = req->iovcnt - 2; + + type = hdr->type; + /* Legacy type isn't supported */ + type &= ~VIRTIO_BLK_T_BARRIER; + + SPDK_DEBUGLOG(vfu_virtio_blk, "%s: type %u, iovcnt %u, payload_len %u\n", + spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), + type, iovcnt, payload_len); + + if (spdk_unlikely(blk_endpoint->bdev_desc == NULL)) { + SPDK_ERRLOG("Bdev has been removed\n"); + virtio_blk_req_finish(blk_req, VIRTIO_BLK_S_IOERR); + return 0; + } + + switch (type) { + case VIRTIO_BLK_T_IN: + case VIRTIO_BLK_T_OUT: + if (spdk_unlikely(payload_len == 0 || (payload_len & (512 - 1)) != 0)) { + SPDK_ERRLOG("Invalid payload length %u\n", payload_len); + virtio_blk_req_finish(blk_req, VIRTIO_BLK_S_UNSUPP); + return -EINVAL; + } + if (type == VIRTIO_BLK_T_IN) { + req->used_len = payload_len + 1; + ret = spdk_bdev_readv(blk_endpoint->bdev_desc, blk_endpoint->io_channel, + &req->iovs[1], iovcnt, hdr->sector * 512, + payload_len, blk_request_complete_cb, blk_req); + } else { + req->used_len = 1; + ret = spdk_bdev_writev(blk_endpoint->bdev_desc, blk_endpoint->io_channel, + &req->iovs[1], iovcnt, hdr->sector * 512, + payload_len, blk_request_complete_cb, blk_req); + } + if (ret) { + SPDK_ERRLOG("R/W error\n"); + virtio_blk_req_finish(blk_req, VIRTIO_BLK_S_IOERR); + return ret; + } + break; + case VIRTIO_BLK_T_DISCARD: + desc = req->iovs[1].iov_base; + if (payload_len != sizeof(*desc)) { + SPDK_NOTICELOG("Invalid discard payload size: %u\n", payload_len); + virtio_blk_req_finish(blk_req, VIRTIO_BLK_S_IOERR); + return -EINVAL; + } + + if (desc->flags & VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP) { + SPDK_ERRLOG("UNMAP flag is only used for WRITE ZEROES command\n"); + virtio_blk_req_finish(blk_req, VIRTIO_BLK_S_UNSUPP); + return -EINVAL; + } + + ret = spdk_bdev_unmap(blk_endpoint->bdev_desc, blk_endpoint->io_channel, + desc->sector * 512, desc->num_sectors * 512, + blk_request_complete_cb, blk_req); + if (ret) { + SPDK_ERRLOG("UNMAP error\n"); + virtio_blk_req_finish(blk_req, VIRTIO_BLK_S_IOERR); + return ret; + } + break; + case VIRTIO_BLK_T_WRITE_ZEROES: + desc = req->iovs[1].iov_base; + if (payload_len != sizeof(*desc)) { + SPDK_NOTICELOG("Invalid write zeroes payload size: %u\n", payload_len); + virtio_blk_req_finish(blk_req, VIRTIO_BLK_S_IOERR); + return -1; + } + + /* Unmap this range, SPDK doesn't support it, kernel will enable this flag by default + * without checking unmap feature is negotiated or not, the flag isn't mandatory, so + * just print a warning. + */ + if (desc->flags & VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP) { + SPDK_WARNLOG("Ignore the unmap flag for WRITE ZEROES from %"PRIx64", len %"PRIx64"\n", + (uint64_t)desc->sector * 512, (uint64_t)desc->num_sectors * 512); + } + + ret = spdk_bdev_write_zeroes(blk_endpoint->bdev_desc, blk_endpoint->io_channel, + desc->sector * 512, desc->num_sectors * 512, + blk_request_complete_cb, blk_req); + if (ret) { + SPDK_ERRLOG("WRITE ZEROES error\n"); + virtio_blk_req_finish(blk_req, VIRTIO_BLK_S_IOERR); + return ret; + } + break; + case VIRTIO_BLK_T_FLUSH: + flush_bytes = spdk_bdev_get_num_blocks(blk_endpoint->bdev) * spdk_bdev_get_block_size( + blk_endpoint->bdev); + if (hdr->sector != 0) { + SPDK_NOTICELOG("sector must be zero for flush command\n"); + virtio_blk_req_finish(blk_req, VIRTIO_BLK_S_IOERR); + return -EINVAL; + } + ret = spdk_bdev_flush(blk_endpoint->bdev_desc, blk_endpoint->io_channel, + 0, flush_bytes, + blk_request_complete_cb, blk_req); + if (ret) { + SPDK_ERRLOG("FLUSH error\n"); + virtio_blk_req_finish(blk_req, VIRTIO_BLK_S_IOERR); + return ret; + } + break; + case VIRTIO_BLK_T_GET_ID: + if (!iovcnt || !payload_len) { + virtio_blk_req_finish(blk_req, VIRTIO_BLK_S_UNSUPP); + return -EINVAL; + } + req->used_len = spdk_min((size_t)VIRTIO_BLK_ID_BYTES, req->iovs[1].iov_len); + spdk_strcpy_pad(req->iovs[1].iov_base, spdk_bdev_get_name(blk_endpoint->bdev), + req->used_len, ' '); + virtio_blk_req_finish(blk_req, VIRTIO_BLK_S_OK); + break; + default: + virtio_blk_req_finish(blk_req, VIRTIO_BLK_S_UNSUPP); + return -ENOTSUP; + } + + return 0; +} + +static void +virtio_blk_update_config(struct virtio_blk_config *blk_cfg, struct spdk_bdev *bdev, + uint16_t num_queues) +{ + memset(blk_cfg, 0, sizeof(*blk_cfg)); + + if (!bdev) { + return; + } + + blk_cfg->blk_size = spdk_bdev_get_block_size(bdev); + blk_cfg->capacity = (blk_cfg->blk_size * spdk_bdev_get_num_blocks(bdev)) / 512; + /* minimum I/O size in blocks */ + blk_cfg->min_io_size = 1; + blk_cfg->num_queues = num_queues; + + if (spdk_bdev_get_buf_align(bdev) > 1) { + blk_cfg->size_max = SPDK_BDEV_LARGE_BUF_MAX_SIZE; + blk_cfg->seg_max = spdk_min(VIRTIO_DEV_MAX_IOVS - 2 - 1, BDEV_IO_NUM_CHILD_IOV - 2 - 1); + } else { + blk_cfg->size_max = 131072; + /* -2 for REQ and RESP and -1 for region boundary splitting */ + blk_cfg->seg_max = VIRTIO_DEV_MAX_IOVS - 2 - 1; + } + + if (spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_UNMAP)) { + /* 16MiB, expressed in 512 Bytes */ + blk_cfg->max_discard_sectors = 32768; + blk_cfg->max_discard_seg = 1; + blk_cfg->discard_sector_alignment = blk_cfg->blk_size / 512; + } + if (spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_WRITE_ZEROES)) { + blk_cfg->max_write_zeroes_sectors = 32768; + blk_cfg->max_write_zeroes_seg = 1; + } +} + +static void +_vfu_virtio_blk_bdev_close(void *arg1) +{ + struct spdk_bdev_desc *bdev_desc = arg1; + + spdk_bdev_close(bdev_desc); +} + +static void +bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, + void *event_ctx) +{ + struct virtio_blk_endpoint *blk_endpoint = event_ctx; + + SPDK_DEBUGLOG(vfu_virtio_blk, "Bdev event: type %d, name %s\n", type, bdev->name); + + switch (type) { + case SPDK_BDEV_EVENT_REMOVE: + SPDK_NOTICELOG("bdev name (%s) received event(SPDK_BDEV_EVENT_REMOVE)\n", bdev->name); + virtio_blk_update_config(&blk_endpoint->blk_cfg, NULL, 0); + + if (blk_endpoint->io_channel) { + spdk_thread_send_msg(blk_endpoint->virtio.thread, _virtio_blk_stop_msg, blk_endpoint); + } + + if (blk_endpoint->bdev_desc) { + spdk_thread_send_msg(blk_endpoint->init_thread, _vfu_virtio_blk_bdev_close, + blk_endpoint->bdev_desc); + blk_endpoint->bdev_desc = NULL; + } + break; + case SPDK_BDEV_EVENT_RESIZE: + SPDK_NOTICELOG("bdev name (%s) received event(SPDK_BDEV_EVENT_RESIZE)\n", bdev->name); + virtio_blk_update_config(&blk_endpoint->blk_cfg, blk_endpoint->bdev, + blk_endpoint->virtio.num_queues); + vfu_virtio_notify_config(&blk_endpoint->virtio); + break; + default: + SPDK_NOTICELOG("Unsupported bdev event: type %d\n", type); + break; + } +} + +static uint64_t +virtio_blk_get_supported_features(struct vfu_virtio_endpoint *virtio_endpoint) +{ + struct virtio_blk_endpoint *blk_endpoint = to_blk_endpoint(virtio_endpoint); + uint64_t features; + struct spdk_bdev *bdev; + + features = VIRTIO_BLK_SUPPORTED_FEATURES | VIRTIO_HOST_SUPPORTED_FEATURES; + + if (!virtio_endpoint->packed_ring) { + features &= ~(1ULL << VIRTIO_F_RING_PACKED); + } + bdev = blk_endpoint->bdev; + + if (spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_UNMAP)) { + features |= (1ULL << VIRTIO_BLK_F_DISCARD); + } + + if (spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_WRITE_ZEROES)) { + features |= (1ULL << VIRTIO_BLK_F_WRITE_ZEROES); + } + + if (spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_FLUSH)) { + features |= (1ULL << VIRTIO_BLK_F_FLUSH); + } + + return features; +} + +static int +virtio_blk_get_device_specific_config(struct vfu_virtio_endpoint *virtio_endpoint, char *buf, + uint64_t offset, uint64_t count) +{ + struct virtio_blk_endpoint *blk_endpoint = to_blk_endpoint(virtio_endpoint); + uint8_t *blk_cfg; + uint64_t len; + + if (offset >= sizeof(struct virtio_blk_config)) { + return -EINVAL; + } + len = spdk_min(sizeof(struct virtio_blk_config) - offset, count); + + blk_cfg = (uint8_t *)&blk_endpoint->blk_cfg; + memcpy(buf, blk_cfg + offset, len); + + return 0; +} + +static struct vfu_virtio_req * +virtio_blk_alloc_req(struct vfu_virtio_endpoint *virtio_endpoint, struct vfu_virtio_vq *vq) +{ + struct virtio_blk_req *blk_req; + + blk_req = calloc(1, sizeof(*blk_req) + dma_sg_size() * (VIRTIO_DEV_MAX_IOVS + 1)); + if (!blk_req) { + return NULL; + } + + return &blk_req->req; +} + +static void +virtio_blk_free_req(struct vfu_virtio_endpoint *virtio_endpoint, struct vfu_virtio_vq *vq, + struct vfu_virtio_req *req) +{ + struct virtio_blk_req *blk_req = to_blk_request(req); + + free(blk_req); +} + +struct vfu_virtio_ops virtio_blk_ops = { + .get_device_features = virtio_blk_get_supported_features, + .alloc_req = virtio_blk_alloc_req, + .free_req = virtio_blk_free_req, + .exec_request = virtio_blk_process_req, + .get_config = virtio_blk_get_device_specific_config, + .start_device = virtio_blk_start, + .stop_device = virtio_blk_stop, +}; + +int +vfu_virtio_blk_add_bdev(const char *name, const char *bdev_name, + uint16_t num_queues, uint16_t qsize, bool packed_ring) +{ + struct spdk_vfu_endpoint *endpoint; + struct vfu_virtio_endpoint *virtio_endpoint; + struct virtio_blk_endpoint *blk_endpoint; + int ret; + + endpoint = spdk_vfu_get_endpoint_by_name(name); + if (!endpoint) { + SPDK_ERRLOG("Endpoint %s doesn't exist\n", name); + return -ENOENT; + } + + virtio_endpoint = spdk_vfu_get_endpoint_private(endpoint); + blk_endpoint = to_blk_endpoint(virtio_endpoint); + + if (blk_endpoint->bdev_desc) { + SPDK_ERRLOG("%s: block device already exists\n", spdk_vfu_get_endpoint_id(endpoint)); + return -EEXIST; + } + + if (num_queues && (num_queues <= VIRTIO_DEV_MAX_VQS)) { + blk_endpoint->virtio.num_queues = num_queues; + } + if (qsize && (qsize <= VIRTIO_VQ_MAX_SIZE)) { + blk_endpoint->virtio.qsize = qsize; + } + blk_endpoint->virtio.packed_ring = packed_ring; + + SPDK_DEBUGLOG(vfu_virtio_blk, "%s: add block device %s, num_queues %u, qsize %u, packed ring %s\n", + spdk_vfu_get_endpoint_id(endpoint), + bdev_name, blk_endpoint->virtio.num_queues, blk_endpoint->virtio.qsize, + packed_ring ? "enabled" : "disabled"); + + ret = spdk_bdev_open_ext(bdev_name, true, bdev_event_cb, blk_endpoint, + &blk_endpoint->bdev_desc); + if (ret != 0) { + SPDK_ERRLOG("%s could not open bdev '%s', error=%d\n", + name, bdev_name, ret); + return ret; + } + blk_endpoint->bdev = spdk_bdev_desc_get_bdev(blk_endpoint->bdev_desc); + virtio_blk_update_config(&blk_endpoint->blk_cfg, blk_endpoint->bdev, + blk_endpoint->virtio.num_queues); + blk_endpoint->init_thread = spdk_get_thread(); + + return 0; +} + +static int +vfu_virtio_blk_endpoint_destruct(struct spdk_vfu_endpoint *endpoint) +{ + struct vfu_virtio_endpoint *virtio_endpoint = spdk_vfu_get_endpoint_private(endpoint); + struct virtio_blk_endpoint *blk_endpoint = to_blk_endpoint(virtio_endpoint); + + if (blk_endpoint->bdev_desc) { + spdk_thread_send_msg(blk_endpoint->init_thread, _vfu_virtio_blk_bdev_close, + blk_endpoint->bdev_desc); + blk_endpoint->bdev_desc = NULL; + } + + vfu_virtio_endpoint_destruct(&blk_endpoint->virtio); + free(blk_endpoint); + + return 0; +} + +static void * +vfu_virtio_blk_endpoint_init(struct spdk_vfu_endpoint *endpoint, + char *basename, const char *endpoint_name) +{ + struct virtio_blk_endpoint *blk_endpoint; + int ret; + + blk_endpoint = calloc(1, sizeof(*blk_endpoint)); + if (!blk_endpoint) { + return NULL; + } + + ret = vfu_virtio_endpoint_setup(&blk_endpoint->virtio, endpoint, basename, endpoint_name, + &virtio_blk_ops); + if (ret) { + SPDK_ERRLOG("Error to setup endpoint %s\n", endpoint_name); + free(blk_endpoint); + return NULL; + } + + return (void *)&blk_endpoint->virtio; +} + +static int +vfu_virtio_blk_get_device_info(struct spdk_vfu_endpoint *endpoint, + struct spdk_vfu_pci_device *device_info) +{ + struct vfu_virtio_endpoint *virtio_endpoint = spdk_vfu_get_endpoint_private(endpoint); + struct virtio_blk_endpoint *blk_endpoint = to_blk_endpoint(virtio_endpoint); + + vfu_virtio_get_device_info(&blk_endpoint->virtio, device_info); + /* Fill Device ID */ + device_info->id.did = PCI_DEVICE_ID_VIRTIO_BLK_MODERN; + + return 0; +} + +struct spdk_vfu_endpoint_ops vfu_virtio_blk_ops = { + .name = "virtio_blk", + .init = vfu_virtio_blk_endpoint_init, + .get_device_info = vfu_virtio_blk_get_device_info, + .get_vendor_capability = vfu_virtio_get_vendor_capability, + .post_memory_add = vfu_virtio_post_memory_add, + .pre_memory_remove = vfu_virtio_pre_memory_remove, + .reset_device = vfu_virtio_pci_reset_cb, + .quiesce_device = vfu_virtio_quiesce_cb, + .destruct = vfu_virtio_blk_endpoint_destruct, + .attach_device = vfu_virtio_attach_device, + .detach_device = vfu_virtio_detach_device, +}; + +static void +__attribute__((constructor)) _vfu_virtio_blk_pci_model_register(void) +{ + spdk_vfu_register_endpoint_ops(&vfu_virtio_blk_ops); +} + +SPDK_LOG_REGISTER_COMPONENT(vfu_virtio_blk) diff --git a/module/vfu_device/vfu_virtio_internal.h b/module/vfu_device/vfu_virtio_internal.h new file mode 100644 index 000000000..dfc9810f5 --- /dev/null +++ b/module/vfu_device/vfu_virtio_internal.h @@ -0,0 +1,403 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) Intel Corporation. + * All rights reserved. + */ + +#ifndef _VFU_VIRTIO_INTERNAL_H +#define _VFU_VIRTIO_INTERNAL_H + +#include +#include +#include + +#include "spdk/vfu_target.h" + +#define VIRTIO_HOST_SUPPORTED_FEATURES ((1ULL << VIRTIO_F_VERSION_1) | \ + (1ULL << VIRTIO_RING_F_INDIRECT_DESC) | \ + (1ULL << VIRTIO_F_RING_PACKED)) + +/* virtio device layout: + * + * region 1: MSI-X Table + * region 2: MSI-X PBA + * region 4: virtio modern memory 64bits BAR + * Common configuration 0x0 - 0x1000 + * ISR access 0x1000 - 0x2000 + * Device specific configuration 0x2000 - 0x3000 + * Notifications 0x3000 - 0x4000 + */ +#define VIRTIO_PCI_COMMON_CFG_OFFSET (0x0) +#define VIRTIO_PCI_COMMON_CFG_LENGTH (0x1000) +#define VIRTIO_PCI_ISR_ACCESS_OFFSET (VIRTIO_PCI_COMMON_CFG_OFFSET + VIRTIO_PCI_COMMON_CFG_LENGTH) +#define VIRTIO_PCI_ISR_ACCESS_LENGTH (0x1000) +#define VIRTIO_PCI_SPECIFIC_CFG_OFFSET (VIRTIO_PCI_ISR_ACCESS_OFFSET + VIRTIO_PCI_ISR_ACCESS_LENGTH) +#define VIRTIO_PCI_SPECIFIC_CFG_LENGTH (0x1000) +#define VIRTIO_PCI_NOTIFICATIONS_OFFSET (VIRTIO_PCI_SPECIFIC_CFG_OFFSET + VIRTIO_PCI_SPECIFIC_CFG_LENGTH) +#define VIRTIO_PCI_NOTIFICATIONS_LENGTH (0x1000) + +#define VIRTIO_PCI_BAR4_LENGTH (VIRTIO_PCI_NOTIFICATIONS_OFFSET + VIRTIO_PCI_NOTIFICATIONS_LENGTH) + +#define VIRTIO_DEV_MAX_IOVS (129) +/* Maximum number of requests which can be processed one time */ +#define VIRTIO_DEV_VRING_MAX_REQS (32) +/* Maximum number of queues can be supported by virtio device */ +#define VIRTIO_DEV_MAX_VQS (64) +/* Default queue size */ +#define VIRTIO_VQ_DEFAULT_SIZE (128) +/* Maximum queue size */ +#define VIRTIO_VQ_MAX_SIZE (1024) + +struct vfu_virtio_endpoint; +struct vfu_virtio_req; + +struct virtio_pci_cfg { + /* Common PCI configuration */ + uint32_t guest_feat_lo; + uint32_t guest_feat_hi; + + /* Negotiated feature bits */ + uint64_t guest_features; + + uint32_t host_feature_select; + uint32_t guest_feature_select; + + uint16_t msix_config; + uint8_t device_status; + uint8_t config_generation; + uint16_t queue_select; + + /* ISR access */ + uint8_t isr; +}; + +enum vfu_vq_state { + VFU_VQ_CREATED = 0, + VFU_VQ_ACTIVE, + VFU_VQ_INACTIVE, +}; + +struct q_mapping { + /* iov of local process mapping. */ + struct iovec iov; + /* Stored sg, needed for unmap. */ + dma_sg_t *sg; + /* physical address */ + uint64_t phys_addr; + /* virtual address */ + union { + void *addr; + + struct vring_desc *desc; + struct vring_packed_desc *desc_packed; + + struct vring_avail *avail; + struct vring_packed_desc_event *driver_event; + + struct vring_used *used; + struct vring_packed_desc_event *device_event; + }; + /* size in bytes */ + uint64_t len; +}; + +struct vfu_virtio_vq { + /* Read Only */ + uint16_t id; + uint16_t qsize; + + bool enabled; + uint16_t vector; + + enum vfu_vq_state q_state; + STAILQ_HEAD(, vfu_virtio_req) free_reqs; + + uint32_t desc_lo; + uint32_t desc_hi; + uint32_t avail_lo; + uint32_t avail_hi; + uint32_t used_lo; + uint32_t used_hi; + + struct q_mapping avail; + struct q_mapping used; + struct q_mapping desc; + + uint16_t last_avail_idx; + uint16_t last_used_idx; + + struct { + /* To mark a descriptor as available in packed ring + * Equal to avail_wrap_counter in spec. + */ + uint8_t avail_phase : 1; + /* To mark a descriptor as used in packed ring + * Equal to used_wrap_counter in spec. + */ + uint8_t used_phase : 1; + uint8_t padding : 5; + bool packed_ring : 1; + } packed; + + /* Request count from last event */ + uint16_t used_req_cnt; + /* Next time when we need to send event */ + uint64_t next_event_time; +}; + +struct vfu_virtio_dev { + char name[SPDK_VFU_MAX_NAME_LEN]; + /* RO for Guest Driver */ + uint16_t num_queues; + /* Supported feature bits by host driver, RO for Guest Driver */ + uint64_t host_features; + + struct virtio_pci_cfg cfg; + struct vfu_virtio_vq vqs[VIRTIO_DEV_MAX_VQS]; + + struct vfu_virtio_endpoint *virtio_endpoint; + + /* VIRTIO_DEV_MAX_VQS * 3 worth of dma_sg_size() */ + uint8_t sg[]; +}; + +struct vfu_virtio_ops { + uint64_t (*get_device_features)(struct vfu_virtio_endpoint *virtio_endpoint); + struct vfu_virtio_req *(*alloc_req)(struct vfu_virtio_endpoint *virtio_endpoint, + struct vfu_virtio_vq *vq); + void (*free_req)(struct vfu_virtio_endpoint *virtio_endpoint, struct vfu_virtio_vq *vq, + struct vfu_virtio_req *req); + int (*exec_request)(struct vfu_virtio_endpoint *virtio_endpoint, struct vfu_virtio_vq *vq, + struct vfu_virtio_req *req); + int (*get_config)(struct vfu_virtio_endpoint *virtio_endpoint, char *buf, uint64_t offset, + uint64_t count); + int (*set_config)(struct vfu_virtio_endpoint *virtio_endpoint, char *buf, uint64_t offset, + uint64_t count); + int (*start_device)(struct vfu_virtio_endpoint *virtio_endpoint); + int (*stop_device)(struct vfu_virtio_endpoint *virtio_endpoint); +}; + +struct vfu_virtio_endpoint { + struct vfu_virtio_dev *dev; + int devmem_fd; + volatile uint32_t *doorbells; + + uint16_t num_queues; + uint16_t qsize; + bool packed_ring; + + uint32_t coalescing_delay_us; + + struct spdk_vfu_endpoint *endpoint; + struct spdk_thread *thread; + + struct vfu_virtio_ops virtio_ops; + + /* quiesce poller */ + uint32_t io_outstanding; + bool quiesce_in_progress; + struct spdk_poller *quiesce_poller; +}; + +struct vfu_virtio_req { + struct vfu_virtio_dev *dev; + struct vfu_virtio_vq *vq; + + STAILQ_ENTRY(vfu_virtio_req) link; + + uint32_t payload_size; + uint32_t used_len; + + /* split vring */ + uint16_t req_idx; + /* packed vring */ + uint16_t buffer_id; + uint16_t num_descs; + + uint16_t iovcnt; + struct iovec iovs[VIRTIO_DEV_MAX_IOVS + 1]; + uint8_t desc_writeable[VIRTIO_DEV_MAX_IOVS + 1]; + + struct iovec *indirect_iov; + dma_sg_t *indirect_sg; + + /* VIRIO_DEV_MAX_IOVS + 1 worth of dma_sg_size() */ + uint8_t sg[]; +}; + +static inline bool +virtio_guest_has_feature(struct vfu_virtio_dev *dev, uint32_t feature_bit) +{ + assert(feature_bit <= 64); + + return !!(dev->cfg.guest_features & (1ULL << feature_bit)); +} + +static inline uint64_t +virtio_queue_desc_size(struct vfu_virtio_dev *dev, struct vfu_virtio_vq *vq) +{ + return sizeof(struct vring_desc) * vq->qsize; +} + +static inline uint64_t +virtio_queue_avail_size(struct vfu_virtio_dev *dev, struct vfu_virtio_vq *vq) +{ + uint16_t event_size; + + if (virtio_guest_has_feature(dev, VIRTIO_F_RING_PACKED)) { + return sizeof(struct vring_packed_desc_event); + } + + event_size = virtio_guest_has_feature(dev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; + return (sizeof(struct vring_avail) + sizeof(uint16_t) * vq->qsize + + event_size); +} + +static inline uint64_t +virtio_queue_used_size(struct vfu_virtio_dev *dev, struct vfu_virtio_vq *vq) +{ + uint16_t event_size; + + if (virtio_guest_has_feature(dev, VIRTIO_F_RING_PACKED)) { + return sizeof(struct vring_packed_desc_event); + } + + event_size = virtio_guest_has_feature(dev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; + return (sizeof(struct vring_used) + sizeof(struct vring_used_elem) * vq->qsize + + event_size); +} + +static inline bool +virtio_queue_event_is_suppressed(struct vfu_virtio_dev *dev, struct vfu_virtio_vq *vq) +{ + bool is_suppressed = false; + + if (virtio_guest_has_feature(dev, VIRTIO_F_RING_PACKED)) { + is_suppressed = vq->avail.driver_event->flags & VRING_PACKED_EVENT_FLAG_DISABLE; + } else { + is_suppressed = vq->avail.avail->flags & VRING_AVAIL_F_NO_INTERRUPT; + + } + + return is_suppressed; +} + +static inline bool +virtio_dev_is_started(struct vfu_virtio_dev *dev) +{ + return !!(dev->cfg.device_status & VIRTIO_CONFIG_S_DRIVER_OK); +} + +static inline bool +virtio_vring_split_desc_is_indirect(struct vring_desc *desc) +{ + return !!(desc->flags & VRING_DESC_F_INDIRECT); +} + +static inline bool +virtio_vring_packed_desc_is_indirect(struct vring_packed_desc *desc) +{ + return !!(desc->flags & VRING_DESC_F_INDIRECT); +} + +static inline bool +virtio_vring_split_desc_is_wr(struct vring_desc *desc) +{ + return !!(desc->flags & VRING_DESC_F_WRITE); +} + +static inline bool +virtio_vring_packed_desc_is_wr(struct vring_packed_desc *desc) +{ + return !!(desc->flags & VRING_DESC_F_WRITE); +} + +static inline bool +virtio_vring_packed_is_avail(struct vring_packed_desc *desc, bool avail_phase) +{ + bool avail_flag, used_flag; + uint16_t flags = desc->flags; + + avail_flag = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL)); + used_flag = !!(flags & (1 << VRING_PACKED_DESC_F_USED)); + + /* To mark a desc as available, the driver sets the F_AVAIL bit in flags + * to match the internal avail wrap counter. It also sets the F_USED bit to + * match the inverse value but it's not mandatory. + */ + return (avail_flag != used_flag) && (avail_flag == avail_phase); +} + +static inline bool +virtio_vring_packed_is_used(struct vring_packed_desc *desc, bool used_phase) +{ + bool avail_flag, used_flag; + uint16_t flags = desc->flags; + + avail_flag = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL)); + used_flag = !!(flags & (1 << VRING_PACKED_DESC_F_USED)); + + /* When the descriptor is used, two flags in descriptor + * avail flag and used flag are set to equal + * and used flag value == used_wrap_counter. + */ + return (used_flag == avail_flag) && (used_flag == used_phase); +} + +static inline bool +virtio_req_iov_is_wr(struct vfu_virtio_req *req, uint32_t iov_num) +{ + assert(iov_num <= VIRTIO_DEV_MAX_IOVS); + return req->desc_writeable[iov_num]; +} + +static inline struct vfu_virtio_req * +vfu_virtio_vq_alloc_req(struct vfu_virtio_endpoint *endpoint, struct vfu_virtio_vq *vq) +{ + assert(endpoint->virtio_ops.alloc_req != NULL); + return endpoint->virtio_ops.alloc_req(endpoint, vq); +} + +static inline void +vfu_virtio_vq_free_req(struct vfu_virtio_endpoint *endpoint, struct vfu_virtio_vq *vq, + struct vfu_virtio_req *req) +{ + assert(endpoint->virtio_ops.free_req); + endpoint->virtio_ops.free_req(endpoint, vq, req); +} + +void virtio_vq_used_ring_split_enqueue(struct vfu_virtio_vq *vq, uint16_t req_idx, + uint32_t used_len); +void virtio_vq_used_ring_packed_enqueue(struct vfu_virtio_vq *vq, uint16_t buffer_id, + uint32_t num_descs, uint32_t used_len); +struct vfu_virtio_req *virito_dev_packed_ring_get_next_avail_req(struct vfu_virtio_dev *dev, + struct vfu_virtio_vq *vq); +struct vfu_virtio_req *virito_dev_split_ring_get_next_avail_req(struct vfu_virtio_dev *dev, + struct vfu_virtio_vq *vq); + +int vfu_virtio_quiesce_cb(struct spdk_vfu_endpoint *endpoint); + +void vfu_virtio_dev_put_req(struct vfu_virtio_req *req); +void vfu_virtio_finish_req(struct vfu_virtio_req *req); +void vfu_virtio_vq_flush_irq(struct vfu_virtio_dev *dev, struct vfu_virtio_vq *vq); +int vfu_virito_dev_process_packed_ring(struct vfu_virtio_dev *dev, struct vfu_virtio_vq *vq); +int vfu_virito_dev_process_split_ring(struct vfu_virtio_dev *dev, struct vfu_virtio_vq *vq); +void vfu_virtio_notify_config(struct vfu_virtio_endpoint *virtio_endpoint); +int vfu_virtio_endpoint_setup(struct vfu_virtio_endpoint *virtio_endpoint, + struct spdk_vfu_endpoint *endpoint, + char *basename, const char *endpoint_name, + struct vfu_virtio_ops *ops); +int vfu_virtio_endpoint_destruct(struct vfu_virtio_endpoint *virtio_endpoint); +void vfu_virtio_get_device_info(struct vfu_virtio_endpoint *virtio_endpoint, + struct spdk_vfu_pci_device *device_info); +int vfu_virtio_attach_device(struct spdk_vfu_endpoint *endpoint); +int vfu_virtio_detach_device(struct spdk_vfu_endpoint *endpoint); +uint16_t vfu_virtio_get_vendor_capability(struct spdk_vfu_endpoint *endpoint, char *buf, + uint16_t buf_len, uint16_t idx); +int vfu_virtio_post_memory_add(struct spdk_vfu_endpoint *endpoint, void *map_start, void *map_end); +int vfu_virtio_pre_memory_remove(struct spdk_vfu_endpoint *endpoint, void *map_start, + void *map_end); +int vfu_virtio_pci_reset_cb(struct spdk_vfu_endpoint *endpoint); +int vfu_virtio_blk_add_bdev(const char *name, const char *bdev_name, + uint16_t num_queues, uint16_t qsize, bool packed_ring); + +#endif diff --git a/module/vfu_device/vfu_virtio_rpc.c b/module/vfu_device/vfu_virtio_rpc.c new file mode 100644 index 000000000..79702e2aa --- /dev/null +++ b/module/vfu_device/vfu_virtio_rpc.c @@ -0,0 +1,126 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) Intel Corporation. + * All rights reserved. + */ + +#include "spdk/bdev.h" +#include "spdk/log.h" +#include "spdk/rpc.h" +#include "spdk/env.h" +#include "spdk/string.h" +#include "spdk/util.h" +#include "spdk/thread.h" + +#include "vfu_virtio_internal.h" + +struct rpc_delete_vfu_endpoint { + char *name; +}; + +static const struct spdk_json_object_decoder rpc_delete_vfu_endpoint_decode[] = { + {"name", offsetof(struct rpc_delete_vfu_endpoint, name), spdk_json_decode_string } +}; + +static void +free_rpc_delete_vfu_endpoint(struct rpc_delete_vfu_endpoint *req) +{ + free(req->name); +} + +static void +rpc_vfu_virtio_delete_endpoint(struct spdk_jsonrpc_request *request, + const struct spdk_json_val *params) +{ + struct rpc_delete_vfu_endpoint req = {0}; + int rc; + + if (spdk_json_decode_object(params, rpc_delete_vfu_endpoint_decode, + SPDK_COUNTOF(rpc_delete_vfu_endpoint_decode), + &req)) { + SPDK_ERRLOG("spdk_json_decode_object failed\n"); + rc = -EINVAL; + goto invalid; + } + + rc = spdk_vfu_delete_endpoint(req.name); + if (rc < 0) { + goto invalid; + } + free_rpc_delete_vfu_endpoint(&req); + + spdk_jsonrpc_send_bool_response(request, true); + return; + +invalid: + free_rpc_delete_vfu_endpoint(&req); + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, + spdk_strerror(-rc)); +} +SPDK_RPC_REGISTER("vfu_virtio_delete_endpoint", rpc_vfu_virtio_delete_endpoint, + SPDK_RPC_RUNTIME) + +struct rpc_vfu_virtio_create_blk { + char *name; + char *bdev_name; + char *cpumask; + uint16_t num_queues; + uint16_t qsize; + bool packed_ring; +}; + +static const struct spdk_json_object_decoder rpc_construct_vfu_virtio_create_blk[] = { + {"name", offsetof(struct rpc_vfu_virtio_create_blk, name), spdk_json_decode_string }, + {"bdev_name", offsetof(struct rpc_vfu_virtio_create_blk, bdev_name), spdk_json_decode_string }, + {"cpumask", offsetof(struct rpc_vfu_virtio_create_blk, cpumask), spdk_json_decode_string, true}, + {"num_queues", offsetof(struct rpc_vfu_virtio_create_blk, num_queues), spdk_json_decode_uint16, true }, + {"qsize", offsetof(struct rpc_vfu_virtio_create_blk, qsize), spdk_json_decode_uint16, true }, + {"packed_ring", offsetof(struct rpc_vfu_virtio_create_blk, packed_ring), spdk_json_decode_bool, true}, +}; + +static void +free_rpc_vfu_virtio_create_blk(struct rpc_vfu_virtio_create_blk *req) +{ + free(req->name); + free(req->bdev_name); + free(req->cpumask); +} + +static void +rpc_vfu_virtio_create_blk_endpoint(struct spdk_jsonrpc_request *request, + const struct spdk_json_val *params) +{ + struct rpc_vfu_virtio_create_blk req = {0}; + int rc; + + if (spdk_json_decode_object(params, rpc_construct_vfu_virtio_create_blk, + SPDK_COUNTOF(rpc_construct_vfu_virtio_create_blk), + &req)) { + SPDK_ERRLOG("spdk_json_decode_object failed\n"); + rc = -EINVAL; + goto invalid; + } + + rc = spdk_vfu_create_endpoint(req.name, req.cpumask, "virtio_blk"); + if (rc) { + SPDK_ERRLOG("Failed to create virtio_blk endpoint\n"); + goto invalid; + } + + rc = vfu_virtio_blk_add_bdev(req.name, req.bdev_name, req.num_queues, req.qsize, + req.packed_ring); + if (rc < 0) { + spdk_vfu_delete_endpoint(req.name); + goto invalid; + } + free_rpc_vfu_virtio_create_blk(&req); + + spdk_jsonrpc_send_bool_response(request, true); + return; + +invalid: + free_rpc_vfu_virtio_create_blk(&req); + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, + spdk_strerror(-rc)); +} +SPDK_RPC_REGISTER("vfu_virtio_create_blk_endpoint", rpc_vfu_virtio_create_blk_endpoint, + SPDK_RPC_RUNTIME) diff --git a/python/spdk/rpc/vfio_user.py b/python/spdk/rpc/vfio_user.py index 638fd5237..6c2740979 100644 --- a/python/spdk/rpc/vfio_user.py +++ b/python/spdk/rpc/vfio_user.py @@ -9,3 +9,43 @@ def vfu_tgt_set_base_path(client, path): } return client.call('vfu_tgt_set_base_path', params) + + +def vfu_virtio_delete_endpoint(client, name): + """Delete specified endpoint name. + + Args: + name: endpoint name + """ + params = { + 'name': name + } + + return client.call('vfu_virtio_delete_endpoint', params) + + +def vfu_virtio_create_blk_endpoint(client, name, bdev_name, cpumask, num_queues, qsize, packed_ring): + """Create virtio-blk endpoint. + + Args: + name: endpoint name + bdev_name: name of block device + cpumask: CPU core mask + num_queues: number of vrings + qsize: number of element of each vring + packed_ring: enable packed ring + """ + params = { + 'name': name, + 'bdev_name': bdev_name + } + if cpumask: + params['cpumask'] = cpumask + if num_queues: + params['num_queues'] = num_queues + if qsize: + params['qsize'] = qsize + if packed_ring: + params['packed_ring'] = packed_ring + + return client.call('vfu_virtio_create_blk_endpoint', params) diff --git a/scripts/rpc.py b/scripts/rpc.py index 38ad6891a..37a19dfe5 100755 --- a/scripts/rpc.py +++ b/scripts/rpc.py @@ -2624,6 +2624,31 @@ Format: 'user:u1 secret:s1 muser:mu1 msecret:ms1,user:u2 secret:s2 muser:mu2 mse p.add_argument('path', help='socket base path') p.set_defaults(func=vfu_tgt_set_base_path) + def vfu_virtio_delete_endpoint(args): + rpc.vfio_user.vfu_virtio_delete_endpoint(args.client, name=args.name) + + p = subparsers.add_parser('vfu_virtio_delete_endpoint', help='Delete the PCI device via endpoint name.') + p.add_argument('name', help='Endpoint name') + p.set_defaults(func=vfu_virtio_delete_endpoint) + + def vfu_virtio_create_blk_endpoint(args): + rpc.vfio_user.vfu_virtio_create_blk_endpoint(args.client, + name=args.name, + bdev_name=args.bdev_name, + cpumask=args.cpumask, + num_queues=args.num_queues, + qsize=args.qsize, + packed_ring=args.packed_ring) + + p = subparsers.add_parser('vfu_virtio_create_blk_endpoint', help='Create virtio-blk endpoint.') + p.add_argument('name', help='Name of the endpoint') + p.add_argument('--bdev-name', help='block device name', type=str, required=True) + p.add_argument('--cpumask', help='CPU masks') + p.add_argument('--num-queues', help='number of vrings', type=int, default=0) + p.add_argument('--qsize', help='number of element for each vring', type=int, default=0) + p.add_argument("--packed-ring", action='store_true', help='Enable packed ring') + p.set_defaults(func=vfu_virtio_create_blk_endpoint) + # accel_fw def accel_get_opc_assignments(args): print_dict(rpc.accel.accel_get_opc_assignments(args.client))