Spdk/module/vfu_device/vfu_virtio.c
paul luse a6dbe3721e update Intel copyright notices
per Intel policy to include file commit date using git cmd
below.  The policy does not apply to non-Intel (C) notices.

git log --follow -C90% --format=%ad --date default <file> | tail -1

and then pull just the 4 digit year from the result.

Intel copyrights were not added to files where Intel either had
no contribution ot the contribution lacked substance (ie license
header updates, formatting changes, etc).  Contribution date used
"--follow -C95%" to get the most accurate date.

Note that several files in this patch didn't end the license/(c)
block with a blank comment line so these were added as the vast
majority of files do have this last blank line.  Simply there for
consistency.

Signed-off-by: paul luse <paul.e.luse@intel.com>
Change-Id: Id5b7ce4f658fe87132f14139ead58d6e285c04d4
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/15192
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
Community-CI: Mellanox Build Bot
2022-11-10 08:28:53 +00:00

1778 lines
48 KiB
C

/* SPDX-License-Identifier: BSD-3-Clause
* Copyright (C) 2022 Intel Corporation.
* All rights reserved.
*/
/*
* virtio over vfio-user common library
*/
#include "spdk/env.h"
#include "spdk/bdev.h"
#include "spdk/bdev_module.h"
#include "spdk/stdinc.h"
#include "spdk/assert.h"
#include "spdk/barrier.h"
#include "spdk/thread.h"
#include "spdk/memory.h"
#include "spdk/util.h"
#include "spdk/log.h"
#include "spdk/string.h"
#include "spdk/likely.h"
#include "vfu_virtio_internal.h"
static int vfu_virtio_dev_start(struct vfu_virtio_dev *dev);
static int vfu_virtio_dev_stop(struct vfu_virtio_dev *dev);
static inline void
vfu_virtio_unmap_q(struct vfu_virtio_dev *dev, struct q_mapping *mapping)
{
struct vfu_virtio_endpoint *virtio_endpoint = dev->virtio_endpoint;
if (mapping->addr != NULL) {
spdk_vfu_unmap_sg(virtio_endpoint->endpoint, mapping->sg,
&mapping->iov, 1);
mapping->addr = NULL;
mapping->len = 0;
}
}
static inline int
vfu_virtio_map_q(struct vfu_virtio_dev *dev, struct q_mapping *mapping, uint64_t phys_addr,
uint64_t len)
{
struct vfu_virtio_endpoint *virtio_endpoint = dev->virtio_endpoint;
void *addr;
if (!mapping->addr && len && phys_addr) {
addr = spdk_vfu_map_one(virtio_endpoint->endpoint, phys_addr, len,
mapping->sg, &mapping->iov, PROT_READ | PROT_WRITE);
if (addr == NULL) {
return -EINVAL;
}
mapping->phys_addr = phys_addr;
mapping->len = len;
mapping->addr = addr;
}
return 0;
}
static int
virtio_dev_map_vq(struct vfu_virtio_dev *dev, struct vfu_virtio_vq *vq)
{
int ret;
uint64_t phys_addr, len;
if (!vq->enabled || (vq->q_state == VFU_VQ_ACTIVE)) {
return 0;
}
SPDK_DEBUGLOG(vfu_virtio, "%s: try to map vq %u\n", dev->name, vq->id);
len = virtio_queue_desc_size(dev, vq);
phys_addr = ((((uint64_t)vq->desc_hi) << 32) | vq->desc_lo);
ret = vfu_virtio_map_q(dev, &vq->desc, phys_addr, len);
if (ret) {
SPDK_DEBUGLOG(vfu_virtio, "Error to map descs\n");
return ret;
}
len = virtio_queue_avail_size(dev, vq);
phys_addr = ((((uint64_t)vq->avail_hi) << 32) | vq->avail_lo);
ret = vfu_virtio_map_q(dev, &vq->avail, phys_addr, len);
if (ret) {
vfu_virtio_unmap_q(dev, &vq->desc);
SPDK_DEBUGLOG(vfu_virtio, "Error to map available ring\n");
return ret;
}
len = virtio_queue_used_size(dev, vq);
phys_addr = ((((uint64_t)vq->used_hi) << 32) | vq->used_lo);
ret = vfu_virtio_map_q(dev, &vq->used, phys_addr, len);
if (ret) {
vfu_virtio_unmap_q(dev, &vq->desc);
vfu_virtio_unmap_q(dev, &vq->avail);
SPDK_DEBUGLOG(vfu_virtio, "Error to map used ring\n");
return ret;
}
/* We're running with polling mode */
if (virtio_guest_has_feature(dev, VIRTIO_F_RING_PACKED)) {
vq->used.device_event->flags = VRING_PACKED_EVENT_FLAG_DISABLE;
} else {
vq->used.used->flags = VRING_USED_F_NO_NOTIFY;
}
SPDK_DEBUGLOG(vfu_virtio, "%s: map vq %u successfully\n", dev->name, vq->id);
vq->q_state = VFU_VQ_ACTIVE;
return 0;
}
static void
virtio_dev_unmap_vq(struct vfu_virtio_dev *dev, struct vfu_virtio_vq *vq)
{
SPDK_DEBUGLOG(vfu_virtio, "%s: unmap vq %u\n", dev->name, vq->id);
vq->q_state = VFU_VQ_INACTIVE;
vfu_virtio_unmap_q(dev, &vq->desc);
vfu_virtio_unmap_q(dev, &vq->avail);
vfu_virtio_unmap_q(dev, &vq->used);
}
static bool
vfu_virtio_vq_should_unmap(struct vfu_virtio_vq *vq, void *map_start, void *map_end)
{
/* always do unmap when stopping the device */
if (!map_start || !map_end) {
return true;
}
if (vq->desc.addr >= map_start && vq->desc.addr < map_end) {
return true;
}
if (vq->avail.addr >= map_start && vq->avail.addr < map_end) {
return true;
}
if (vq->used.addr >= map_start && vq->used.addr < map_end) {
return true;
}
return false;
}
static void
vfu_virtio_dev_unmap_vqs(struct vfu_virtio_dev *dev, void *map_start, void *map_end)
{
uint32_t i;
struct vfu_virtio_vq *vq;
for (i = 0; i < dev->num_queues; i++) {
vq = &dev->vqs[i];
if (!vq->enabled) {
continue;
}
if (!vfu_virtio_vq_should_unmap(vq, map_start, map_end)) {
continue;
}
virtio_dev_unmap_vq(dev, vq);
}
}
/* This function is used to notify VM that the device
* configuration space has been changed.
*/
void
vfu_virtio_notify_config(struct vfu_virtio_endpoint *virtio_endpoint)
{
struct spdk_vfu_endpoint *endpoint = virtio_endpoint->endpoint;
if (virtio_endpoint->dev == NULL) {
return;
}
virtio_endpoint->dev->cfg.isr = 1;
virtio_endpoint->dev->cfg.config_generation++;
vfu_irq_trigger(spdk_vfu_get_vfu_ctx(endpoint), virtio_endpoint->dev->cfg.msix_config);
}
static void
vfu_virtio_dev_reset(struct vfu_virtio_dev *dev)
{
uint32_t i;
struct vfu_virtio_vq *vq;
SPDK_DEBUGLOG(vfu_virtio, "device %s resetting\n", dev->name);
for (i = 0; i < dev->num_queues; i++) {
vq = &dev->vqs[i];
vq->q_state = VFU_VQ_CREATED;
vq->vector = 0;
vq->enabled = false;
vq->last_avail_idx = 0;
vq->last_used_idx = 0;
vq->packed.packed_ring = false;
vq->packed.avail_phase = 0;
vq->packed.used_phase = 0;
}
memset(&dev->cfg, 0, sizeof(struct virtio_pci_cfg));
}
static int
virtio_dev_set_status(struct vfu_virtio_dev *dev, uint8_t status)
{
int ret = 0;
SPDK_DEBUGLOG(vfu_virtio, "device current status %x, set status %x\n", dev->cfg.device_status,
status);
if (!(virtio_dev_is_started(dev))) {
if (status & VIRTIO_CONFIG_S_DRIVER_OK) {
ret = vfu_virtio_dev_start(dev);
}
} else {
if (!(status & VIRTIO_CONFIG_S_DRIVER_OK)) {
ret = vfu_virtio_dev_stop(dev);
}
}
if (ret) {
SPDK_ERRLOG("Failed to start/stop device\n");
return ret;
}
dev->cfg.device_status = status;
if (status == 0) {
vfu_virtio_dev_reset(dev);
}
return 0;
}
static int
virtio_dev_set_features(struct vfu_virtio_dev *dev, uint64_t features)
{
if (dev->cfg.device_status & VIRTIO_CONFIG_S_FEATURES_OK) {
SPDK_ERRLOG("Feature negotiation has finished\n");
return -EINVAL;
}
if (features & ~dev->host_features) {
SPDK_ERRLOG("Host features 0x%"PRIx64", guest features 0x%"PRIx64"\n",
dev->host_features, features);
return -ENOTSUP;
}
SPDK_DEBUGLOG(vfu_virtio, "%s: negotiated features 0x%"PRIx64"\n", dev->name,
features);
dev->cfg.guest_features = features;
return 0;
}
static int
virtio_dev_enable_vq(struct vfu_virtio_dev *dev, uint16_t qid)
{
struct vfu_virtio_vq *vq;
SPDK_DEBUGLOG(vfu_virtio, "%s: enable vq %u\n", dev->name, qid);
vq = &dev->vqs[qid];
if (vq->enabled) {
SPDK_ERRLOG("Queue %u is enabled\n", qid);
return -EINVAL;
}
vq->enabled = true;
if (virtio_dev_map_vq(dev, vq)) {
SPDK_ERRLOG("Queue %u failed to map\n", qid);
return 0;
}
vq->avail.avail->idx = 0;
vq->last_avail_idx = 0;
vq->used.used->idx = 0;
vq->last_used_idx = 0;
if (virtio_guest_has_feature(dev, VIRTIO_F_RING_PACKED)) {
SPDK_DEBUGLOG(vfu_virtio, "%s: vq %u PACKED RING ENABLED\n", dev->name, qid);
vq->packed.packed_ring = true;
vq->packed.avail_phase = true;
vq->packed.used_phase = true;
}
return 0;
}
static int
virtio_dev_disable_vq(struct vfu_virtio_dev *dev, uint16_t qid)
{
struct vfu_virtio_vq *vq;
SPDK_DEBUGLOG(vfu_virtio, "%s: disable vq %u\n", dev->name, qid);
vq = &dev->vqs[qid];
if (!vq->enabled) {
SPDK_NOTICELOG("Queue %u isn't enabled\n", qid);
return 0;
}
virtio_dev_unmap_vq(dev, vq);
vq->q_state = VFU_VQ_CREATED;
vq->vector = 0;
vq->enabled = false;
vq->last_avail_idx = 0;
vq->last_used_idx = 0;
vq->packed.packed_ring = false;
vq->packed.avail_phase = 0;
vq->packed.used_phase = 0;
return 0;
}
static int
virtio_dev_split_get_avail_reqs(struct vfu_virtio_dev *dev, struct vfu_virtio_vq *vq,
uint16_t *reqs, uint16_t max_reqs)
{
uint16_t count, i, avail_idx, last_idx;
last_idx = vq->last_avail_idx;
avail_idx = vq->avail.avail->idx;
spdk_smp_rmb();
count = avail_idx - last_idx;
if (count == 0) {
return 0;
}
count = spdk_min(count, max_reqs);
vq->last_avail_idx += count;
for (i = 0; i < count; i++) {
reqs[i] = vq->avail.avail->ring[(last_idx + i) & (vq->qsize - 1)];
}
SPDK_DEBUGLOG(vfu_virtio,
"AVAIL: vq %u last_idx=%"PRIu16" avail_idx=%"PRIu16" count=%"PRIu16"\n",
vq->id, last_idx, avail_idx, count);
return count;
}
static int
virtio_vring_split_desc_get_next(struct vring_desc **desc,
struct vring_desc *desc_table,
uint32_t desc_table_size)
{
struct vring_desc *old_desc = *desc;
uint16_t next_idx;
if ((old_desc->flags & VRING_DESC_F_NEXT) == 0) {
*desc = NULL;
return 0;
}
next_idx = old_desc->next;
if (spdk_unlikely(next_idx >= desc_table_size)) {
*desc = NULL;
return -1;
}
*desc = &desc_table[next_idx];
return 0;
}
static inline void *
virtio_vring_desc_to_iov(struct vfu_virtio_dev *dev, struct vring_desc *desc,
dma_sg_t *sg, struct iovec *iov)
{
struct vfu_virtio_endpoint *virtio_endpoint = dev->virtio_endpoint;
return spdk_vfu_map_one(virtio_endpoint->endpoint, desc->addr, desc->len,
sg, iov, PROT_READ | PROT_WRITE);
}
static int
virtio_split_vring_get_desc(struct vfu_virtio_dev *dev, struct vfu_virtio_vq *vq, uint16_t desc_idx,
struct vring_desc **desc, struct vring_desc **desc_table,
uint32_t *desc_table_size,
dma_sg_t *sg, struct iovec *iov)
{
*desc = &vq->desc.desc[desc_idx];
if (virtio_vring_split_desc_is_indirect(*desc)) {
*desc_table_size = (*desc)->len / sizeof(struct vring_desc);
*desc_table = virtio_vring_desc_to_iov(dev, *desc, sg, iov);
*desc = *desc_table;
if (*desc == NULL) {
return -EINVAL;
}
return 0;
}
*desc_table = vq->desc.desc;
*desc_table_size = vq->qsize;
return 0;
}
static inline dma_sg_t *
virtio_req_to_sg_t(struct vfu_virtio_req *req, uint32_t iovcnt)
{
return (dma_sg_t *)(req->sg + iovcnt * dma_sg_size());
}
static inline struct vfu_virtio_req *
vfu_virtio_dev_get_req(struct vfu_virtio_endpoint *virtio_endpoint, struct vfu_virtio_vq *vq)
{
struct vfu_virtio_req *req;
req = STAILQ_FIRST(&vq->free_reqs);
if (req == NULL) {
return NULL;
}
STAILQ_REMOVE_HEAD(&vq->free_reqs, link);
req->iovcnt = 0;
req->used_len = 0;
req->payload_size = 0;
req->req_idx = 0;
req->buffer_id = 0;
req->num_descs = 0;
return req;
}
void
vfu_virtio_dev_put_req(struct vfu_virtio_req *req)
{
struct vfu_virtio_dev *dev = req->dev;
struct vfu_virtio_endpoint *virtio_endpoint = dev->virtio_endpoint;
vfu_ctx_t *vfu_ctx = spdk_vfu_get_vfu_ctx(virtio_endpoint->endpoint);
if (req->indirect_iov->iov_base) {
vfu_sgl_put(vfu_ctx, req->indirect_sg, req->indirect_iov, 1);
req->indirect_iov->iov_base = NULL;
req->indirect_iov->iov_len = 0;
}
if (req->iovcnt) {
vfu_sgl_put(vfu_ctx, virtio_req_to_sg_t(req, 0), req->iovs, req->iovcnt);
req->iovcnt = 0;
}
STAILQ_INSERT_HEAD(&req->vq->free_reqs, req, link);
}
void
vfu_virtio_finish_req(struct vfu_virtio_req *req)
{
struct vfu_virtio_dev *dev = req->dev;
struct vfu_virtio_endpoint *virtio_endpoint = dev->virtio_endpoint;
assert(virtio_endpoint->io_outstanding);
virtio_endpoint->io_outstanding--;
if (!virtio_guest_has_feature(req->dev, VIRTIO_F_RING_PACKED)) {
virtio_vq_used_ring_split_enqueue(req->vq, req->req_idx, req->used_len);
} else {
virtio_vq_used_ring_packed_enqueue(req->vq, req->buffer_id, req->num_descs, req->used_len);
}
vfu_virtio_dev_put_req(req);
}
static inline void
vfu_virtio_dev_free_reqs(struct vfu_virtio_endpoint *virtio_endpoint, struct vfu_virtio_dev *dev)
{
struct vfu_virtio_req *req;
struct vfu_virtio_vq *vq;
uint32_t i;
for (i = 0; i < dev->num_queues; i++) {
vq = &dev->vqs[i];
while (!STAILQ_EMPTY(&vq->free_reqs)) {
req = STAILQ_FIRST(&vq->free_reqs);
STAILQ_REMOVE_HEAD(&vq->free_reqs, link);
vfu_virtio_vq_free_req(virtio_endpoint, vq, req);
}
}
}
static int
virtio_dev_split_iovs_setup(struct vfu_virtio_dev *dev, struct vfu_virtio_vq *vq,
uint16_t desc_idx, struct vfu_virtio_req *req)
{
struct vring_desc *desc, *desc_table;
uint32_t desc_table_size, len = 0;
uint32_t desc_handled_cnt = 0;
int rc;
rc = virtio_split_vring_get_desc(dev, vq, desc_idx, &desc,
&desc_table, &desc_table_size,
req->indirect_sg, req->indirect_iov);
if (spdk_unlikely(rc)) {
SPDK_ERRLOG("Invalid descriptor at index %"PRIu16".\n", desc_idx);
return rc;
}
assert(req->iovcnt == 0);
while (true) {
if (spdk_unlikely(!virtio_vring_desc_to_iov(dev, desc, virtio_req_to_sg_t(req, req->iovcnt),
&req->iovs[req->iovcnt]))) {
return -EINVAL;
}
req->desc_writeable[req->iovcnt] = false;
if (virtio_vring_split_desc_is_wr(desc)) {
req->desc_writeable[req->iovcnt] = true;
}
req->iovcnt++;
len += desc->len;
rc = virtio_vring_split_desc_get_next(&desc, desc_table, desc_table_size);
if (spdk_unlikely(rc)) {
return rc;
} else if (desc == NULL) {
break;
}
desc_handled_cnt++;
if (spdk_unlikely(desc_handled_cnt > desc_table_size)) {
return -EINVAL;
}
}
req->payload_size = len;
return 0;
}
void
virtio_vq_used_ring_split_enqueue(struct vfu_virtio_vq *vq, uint16_t req_idx, uint32_t used_len)
{
uint16_t last_idx = vq->last_used_idx & (vq->qsize - 1);
SPDK_DEBUGLOG(vfu_virtio,
"Queue %u - USED RING: last_idx=%"PRIu16" req_idx=%"PRIu16" used_len=%"PRIu32"\n",
vq->id, last_idx, req_idx, used_len);
vq->used.used->ring[last_idx].id = req_idx;
vq->used.used->ring[last_idx].len = used_len;
vq->last_used_idx++;
spdk_smp_wmb();
*(volatile uint16_t *)&vq->used.used->idx = vq->last_used_idx;
vq->used_req_cnt++;
}
void
virtio_vq_used_ring_packed_enqueue(struct vfu_virtio_vq *vq, uint16_t buffer_id, uint32_t num_descs,
uint32_t used_len)
{
struct vring_packed_desc *desc = &vq->desc.desc_packed[vq->last_used_idx];
SPDK_DEBUGLOG(vfu_virtio,
"Queue %u - USED RING: buffer_id=%"PRIu16" num_descs=%u used_len=%"PRIu32"\n",
vq->id, buffer_id, num_descs, used_len);
if (spdk_unlikely(virtio_vring_packed_is_used(desc, vq->packed.used_phase))) {
SPDK_ERRLOG("descriptor has been used before\n");
return;
}
/* In used desc addr is unused and len specifies the buffer length
* that has been written to by the device.
*/
desc->addr = 0;
desc->len = used_len;
/* This bit specifies whether any data has been written by the device */
if (used_len != 0) {
desc->flags |= VRING_DESC_F_WRITE;
}
/* Buffer ID is included in the last descriptor in the list.
* The driver needs to keep track of the size of the list corresponding
* to each buffer ID.
*/
desc->id = buffer_id;
/* A device MUST NOT make the descriptor used before buffer_id is
* written to the descriptor.
*/
spdk_smp_wmb();
/* To mark a desc as used, the device sets the F_USED bit in flags to match
* the internal Device ring wrap counter. It also sets the F_AVAIL bit to
* match the same value.
*/
if (vq->packed.used_phase) {
desc->flags |= (1 << VRING_PACKED_DESC_F_AVAIL);
desc->flags |= (1 << VRING_PACKED_DESC_F_USED);
} else {
desc->flags &= ~(1 << VRING_PACKED_DESC_F_AVAIL);
desc->flags &= ~(1 << VRING_PACKED_DESC_F_USED);
}
vq->last_used_idx += num_descs;
if (vq->last_used_idx >= vq->qsize) {
vq->last_used_idx -= vq->qsize;
vq->packed.used_phase = !vq->packed.used_phase;
}
vq->used_req_cnt++;
}
static int
vfu_virtio_vq_post_irq(struct vfu_virtio_dev *dev, struct vfu_virtio_vq *vq)
{
struct vfu_virtio_endpoint *virtio_endpoint = dev->virtio_endpoint;
vfu_ctx_t *vfu_ctx = spdk_vfu_get_vfu_ctx(virtio_endpoint->endpoint);
vq->used_req_cnt = 0;
if (spdk_vfu_endpoint_msix_enabled(virtio_endpoint->endpoint)) {
SPDK_DEBUGLOG(vfu_virtio, "%s: Queue %u post MSIX IV %u\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint),
vq->id, vq->vector);
return vfu_irq_trigger(vfu_ctx, vq->vector);
} else {
if (!spdk_vfu_endpoint_intx_enabled(virtio_endpoint->endpoint)) {
SPDK_DEBUGLOG(vfu_virtio, "%s: IRQ disabled\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint));
return 0;
}
SPDK_DEBUGLOG(vfu_virtio, "%s: Queue %u post ISR\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), vq->id);
dev->cfg.isr = 1;
return vfu_irq_trigger(vfu_ctx, 0);
}
}
void
vfu_virtio_vq_flush_irq(struct vfu_virtio_dev *dev, struct vfu_virtio_vq *vq)
{
struct vfu_virtio_endpoint *virtio_endpoint = dev->virtio_endpoint;
uint32_t delay_us;
if (vq->used_req_cnt == 0) {
return;
}
/* No need to notify client */
if (virtio_queue_event_is_suppressed(dev, vq)) {
return;
}
/* Interrupt coalescing disabled */
if (!virtio_endpoint->coalescing_delay_us) {
vfu_virtio_vq_post_irq(dev, vq);
return;
}
/* No need for event right now */
if (spdk_get_ticks() < vq->next_event_time) {
return;
}
vfu_virtio_vq_post_irq(dev, vq);
delay_us = virtio_endpoint->coalescing_delay_us;
vq->next_event_time = spdk_get_ticks() + delay_us * spdk_get_ticks_hz() / (1000000ULL);
}
int
vfu_virito_dev_process_split_ring(struct vfu_virtio_dev *dev, struct vfu_virtio_vq *vq)
{
struct vfu_virtio_endpoint *virtio_endpoint = dev->virtio_endpoint;
struct vfu_virtio_req *req;
uint16_t reqs_idx[VIRTIO_DEV_VRING_MAX_REQS];
uint16_t reqs_cnt, i;
int ret;
reqs_cnt = virtio_dev_split_get_avail_reqs(dev, vq, reqs_idx, VIRTIO_DEV_VRING_MAX_REQS);
if (!reqs_cnt) {
return 0;
}
SPDK_DEBUGLOG(vfu_virtio, "%s: get %u descriptors\n", dev->name, reqs_cnt);
for (i = 0; i < reqs_cnt; i++) {
req = vfu_virtio_dev_get_req(virtio_endpoint, vq);
if (spdk_unlikely(!req)) {
SPDK_ERRLOG("Error to get request\n");
/* TODO: address the error case */
return -EIO;
}
req->req_idx = reqs_idx[i];
ret = virtio_dev_split_iovs_setup(dev, vq, req->req_idx, req);
if (spdk_unlikely(ret)) {
/* let the device to response this error */
SPDK_ERRLOG("Split vring setup failed with index %u\n", i);
}
assert(virtio_endpoint->virtio_ops.exec_request);
virtio_endpoint->io_outstanding++;
virtio_endpoint->virtio_ops.exec_request(virtio_endpoint, vq, req);
}
return i;
}
struct vfu_virtio_req *
virito_dev_split_ring_get_next_avail_req(struct vfu_virtio_dev *dev, struct vfu_virtio_vq *vq)
{
struct vfu_virtio_endpoint *virtio_endpoint = dev->virtio_endpoint;
struct vfu_virtio_req *req;
uint16_t reqs_idx[VIRTIO_DEV_VRING_MAX_REQS];
uint16_t reqs_cnt;
int ret;
reqs_cnt = virtio_dev_split_get_avail_reqs(dev, vq, reqs_idx, 1);
if (!reqs_cnt) {
return NULL;
}
assert(reqs_cnt == 1);
SPDK_DEBUGLOG(vfu_virtio, "%s: get 1 descriptors\n", dev->name);
req = vfu_virtio_dev_get_req(virtio_endpoint, vq);
if (!req) {
SPDK_ERRLOG("Error to get request\n");
return NULL;
}
req->req_idx = reqs_idx[0];
ret = virtio_dev_split_iovs_setup(dev, vq, req->req_idx, req);
if (ret) {
SPDK_ERRLOG("Split vring setup failed\n");
vfu_virtio_dev_put_req(req);
return NULL;
}
return req;
}
static inline void *
virtio_vring_packed_desc_to_iov(struct vfu_virtio_dev *dev, struct vring_packed_desc *desc,
dma_sg_t *sg, struct iovec *iov)
{
struct vfu_virtio_endpoint *virtio_endpoint = dev->virtio_endpoint;
return spdk_vfu_map_one(virtio_endpoint->endpoint, desc->addr, desc->len,
sg, iov, PROT_READ | PROT_WRITE);
}
static int
virtio_dev_packed_iovs_setup(struct vfu_virtio_dev *dev, struct vfu_virtio_vq *vq,
uint16_t last_avail_idx,
struct vring_packed_desc *current_desc, struct vfu_virtio_req *req)
{
struct vring_packed_desc *desc, *desc_table = NULL;
uint16_t new_idx, num_descs, desc_table_size = 0;
uint32_t len = 0;
SPDK_DEBUGLOG(vfu_virtio, "%s: last avail idx %u, req %p\n", dev->name, last_avail_idx, req);
desc = NULL;
num_descs = 1;
if (virtio_vring_packed_desc_is_indirect(current_desc)) {
req->buffer_id = current_desc->id;
desc_table = virtio_vring_packed_desc_to_iov(dev, current_desc, req->indirect_sg,
req->indirect_iov);
if (spdk_unlikely(desc_table == NULL)) {
SPDK_ERRLOG("Map Indirect Desc to IOV failed\n");
return -EINVAL;
}
desc_table_size = current_desc->len / sizeof(struct vring_packed_desc);
desc = desc_table;
SPDK_DEBUGLOG(vfu_virtio, "%s: indirect desc %p, desc size %u, req %p\n",
dev->name, desc_table, desc_table_size, req);
} else {
desc = current_desc;
}
assert(req->iovcnt == 0);
/* Map descs to IOVs */
new_idx = last_avail_idx;
while (1) {
assert(desc != NULL);
if (spdk_unlikely(req->iovcnt == VIRTIO_DEV_MAX_IOVS)) {
SPDK_ERRLOG("Max IOVs in request reached (iovcnt = %d).\n", req->iovcnt);
return -EINVAL;
}
if (spdk_unlikely(!virtio_vring_packed_desc_to_iov(dev, desc, virtio_req_to_sg_t(req, req->iovcnt),
&req->iovs[req->iovcnt]))) {
SPDK_ERRLOG("Map Desc to IOV failed (iovcnt = %d).\n", req->iovcnt);
return -EINVAL;
}
req->desc_writeable[req->iovcnt] = false;
if (virtio_vring_packed_desc_is_wr(desc)) {
req->desc_writeable[req->iovcnt] = true;
}
req->iovcnt++;
len += desc->len;
/* get next desc */
if (desc_table) {
if (req->iovcnt < desc_table_size) {
desc = &desc_table[req->iovcnt];
} else {
desc = NULL;
}
} else {
if ((desc->flags & VRING_DESC_F_NEXT) == 0) {
req->buffer_id = desc->id;
desc = NULL;
} else {
new_idx = (new_idx + 1) % vq->qsize;
desc = &vq->desc.desc_packed[new_idx];
num_descs++;
req->buffer_id = desc->id;
}
}
if (desc == NULL) {
break;
}
}
req->num_descs = num_descs;
vq->last_avail_idx = (new_idx + 1) % vq->qsize;
if (vq->last_avail_idx < last_avail_idx) {
vq->packed.avail_phase = !vq->packed.avail_phase;
}
req->payload_size = len;
SPDK_DEBUGLOG(vfu_virtio, "%s: req %p, iovcnt %u, num_descs %u\n",
dev->name, req, req->iovcnt, num_descs);
return 0;
}
int
vfu_virito_dev_process_packed_ring(struct vfu_virtio_dev *dev, struct vfu_virtio_vq *vq)
{
struct vfu_virtio_endpoint *virtio_endpoint = dev->virtio_endpoint;
struct vring_packed_desc *desc;
int ret;
struct vfu_virtio_req *req;
uint16_t i, max_reqs;
max_reqs = VIRTIO_DEV_VRING_MAX_REQS;
for (i = 0; i < max_reqs; i++) {
desc = &vq->desc.desc_packed[vq->last_avail_idx];
if (!virtio_vring_packed_is_avail(desc, vq->packed.avail_phase)) {
return i;
}
req = vfu_virtio_dev_get_req(virtio_endpoint, vq);
if (spdk_unlikely(!req)) {
SPDK_ERRLOG("Error to get request\n");
/* TODO: address the error case */
assert(false);
return -EIO;
}
ret = virtio_dev_packed_iovs_setup(dev, vq, vq->last_avail_idx, desc, req);
if (spdk_unlikely(ret)) {
/* let the device to response the error */
SPDK_ERRLOG("virtio_dev_packed_iovs_setup failed\n");
}
assert(virtio_endpoint->virtio_ops.exec_request);
virtio_endpoint->io_outstanding++;
virtio_endpoint->virtio_ops.exec_request(virtio_endpoint, vq, req);
}
return i;
}
struct vfu_virtio_req *
virito_dev_packed_ring_get_next_avail_req(struct vfu_virtio_dev *dev, struct vfu_virtio_vq *vq)
{
struct vfu_virtio_endpoint *virtio_endpoint = dev->virtio_endpoint;
struct vring_packed_desc *desc;
int ret;
struct vfu_virtio_req *req;
desc = &vq->desc.desc_packed[vq->last_avail_idx];
if (!virtio_vring_packed_is_avail(desc, vq->packed.avail_phase)) {
return NULL;
}
SPDK_DEBUGLOG(vfu_virtio, "%s: get 1 descriptors\n", dev->name);
req = vfu_virtio_dev_get_req(virtio_endpoint, vq);
if (!req) {
SPDK_ERRLOG("Error to get request\n");
return NULL;
}
ret = virtio_dev_packed_iovs_setup(dev, vq, vq->last_avail_idx, desc, req);
if (ret) {
SPDK_ERRLOG("virtio_dev_packed_iovs_setup failed\n");
vfu_virtio_dev_put_req(req);
return NULL;
}
return req;
}
static int
virtio_vfu_pci_common_cfg(struct vfu_virtio_endpoint *virtio_endpoint, char *buf,
size_t count, loff_t pos, bool is_write)
{
struct vfu_virtio_dev *dev = virtio_endpoint->dev;
uint32_t offset, value = 0;
int ret;
assert(count <= 4);
offset = pos - VIRTIO_PCI_COMMON_CFG_OFFSET;
if (is_write) {
memcpy(&value, buf, count);
switch (offset) {
case VIRTIO_PCI_COMMON_DFSELECT:
dev->cfg.host_feature_select = value;
SPDK_DEBUGLOG(vfu_virtio, "%s: WRITE PCI_COMMON_DFSELECT with 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint),
value);
break;
case VIRTIO_PCI_COMMON_GFSELECT:
dev->cfg.guest_feature_select = value;
SPDK_DEBUGLOG(vfu_virtio, "%s: WRITE PCI_COMMON_GFSELECT with 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint),
value);
break;
case VIRTIO_PCI_COMMON_GF:
assert(dev->cfg.guest_feature_select <= 1);
if (dev->cfg.guest_feature_select) {
dev->cfg.guest_feat_hi = value;
SPDK_DEBUGLOG(vfu_virtio, "%s: WRITE PCI_COMMON_GF_HI with 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint),
value);
} else {
dev->cfg.guest_feat_lo = value;
SPDK_DEBUGLOG(vfu_virtio, "%s: WRITE PCI_COMMON_GF_LO with 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint),
value);
}
ret = virtio_dev_set_features(dev,
(((uint64_t)dev->cfg.guest_feat_hi << 32) | dev->cfg.guest_feat_lo));
if (ret) {
return ret;
}
break;
case VIRTIO_PCI_COMMON_MSIX:
dev->cfg.msix_config = value;
SPDK_DEBUGLOG(vfu_virtio, "%s: WRITE PCI_COMMON_MSIX with 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint),
value);
break;
case VIRTIO_PCI_COMMON_STATUS:
SPDK_DEBUGLOG(vfu_virtio, "%s: WRITE PCI_COMMON_STATUS with 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint),
value);
ret = virtio_dev_set_status(dev, value);
if (ret) {
return ret;
}
break;
case VIRTIO_PCI_COMMON_Q_SELECT:
if (value < VIRTIO_DEV_MAX_VQS) {
dev->cfg.queue_select = value;
}
SPDK_DEBUGLOG(vfu_virtio, "%s: WRITE PCI_COMMON_Q_SELECT with 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint),
value);
break;
case VIRTIO_PCI_COMMON_Q_SIZE:
dev->vqs[dev->cfg.queue_select].qsize = value;
SPDK_DEBUGLOG(vfu_virtio, "%s: WRITE PCI_COMMON_Q_SIZE with 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint),
value);
break;
case VIRTIO_PCI_COMMON_Q_MSIX:
dev->vqs[dev->cfg.queue_select].vector = value;
SPDK_DEBUGLOG(vfu_virtio, "%s: WRITE PCI_COMMON_Q_MSIX with 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint),
value);
break;
case VIRTIO_PCI_COMMON_Q_ENABLE:
SPDK_DEBUGLOG(vfu_virtio, "%s: WRITE PCI_COMMON_Q_ENABLE with 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint),
value);
if (value == 1) {
ret = virtio_dev_enable_vq(dev, dev->cfg.queue_select);
if (ret) {
return ret;
}
} else {
ret = virtio_dev_disable_vq(dev, dev->cfg.queue_select);
if (ret) {
return ret;
}
}
break;
case VIRTIO_PCI_COMMON_Q_DESCLO:
dev->vqs[dev->cfg.queue_select].desc_lo = value;
SPDK_DEBUGLOG(vfu_virtio, "%s: WRITE queue %u PCI_COMMON_Q_DESCLO with 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), dev->cfg.queue_select, value);
break;
case VIRTIO_PCI_COMMON_Q_DESCHI:
dev->vqs[dev->cfg.queue_select].desc_hi = value;
SPDK_DEBUGLOG(vfu_virtio, "%s: WRITE queue %u PCI_COMMON_Q_DESCHI with 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), dev->cfg.queue_select, value);
break;
case VIRTIO_PCI_COMMON_Q_AVAILLO:
dev->vqs[dev->cfg.queue_select].avail_lo = value;
SPDK_DEBUGLOG(vfu_virtio, "%s: WRITE queue %u PCI_COMMON_Q_AVAILLO with 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), dev->cfg.queue_select, value);
break;
case VIRTIO_PCI_COMMON_Q_AVAILHI:
dev->vqs[dev->cfg.queue_select].avail_hi = value;
SPDK_DEBUGLOG(vfu_virtio, "%s: WRITE queue %u PCI_COMMON_Q_AVAILHI with 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), dev->cfg.queue_select, value);
break;
case VIRTIO_PCI_COMMON_Q_USEDLO:
dev->vqs[dev->cfg.queue_select].used_lo = value;
SPDK_DEBUGLOG(vfu_virtio, "%s: WRITE queue %u PCI_COMMON_Q_USEDLO with 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), dev->cfg.queue_select, value);
break;
case VIRTIO_PCI_COMMON_Q_USEDHI:
dev->vqs[dev->cfg.queue_select].used_hi = value;
SPDK_DEBUGLOG(vfu_virtio, "%s: WRITE queue %u PCI_COMMON_Q_USEDHI with 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), dev->cfg.queue_select, value);
break;
default:
SPDK_ERRLOG("%s: WRITE UNSUPPORTED offset 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), offset);
errno = EIO;
return -1;
}
} else {
switch (offset) {
case VIRTIO_PCI_COMMON_DFSELECT:
value = dev->cfg.host_feature_select;
SPDK_DEBUGLOG(vfu_virtio, "%s: READ PCI_COMMON_DFSELECT with 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint),
value);
break;
case VIRTIO_PCI_COMMON_DF:
assert(dev->cfg.host_feature_select <= 1);
if (dev->cfg.host_feature_select) {
value = dev->host_features >> 32;
SPDK_DEBUGLOG(vfu_virtio, "%s: READ PCI_COMMON_DF_HI with 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint),
value);
} else {
value = dev->host_features;
SPDK_DEBUGLOG(vfu_virtio, "%s: READ PCI_COMMON_DF_LO with 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint),
value);
}
break;
case VIRTIO_PCI_COMMON_GFSELECT:
value = dev->cfg.guest_feature_select;
SPDK_DEBUGLOG(vfu_virtio, "%s: READ PCI_COMMON_GFSELECT with 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint),
value);
break;
case VIRTIO_PCI_COMMON_GF:
assert(dev->cfg.guest_feature_select <= 1);
if (dev->cfg.guest_feature_select) {
value = dev->cfg.guest_feat_hi;
SPDK_DEBUGLOG(vfu_virtio, "%s: READ PCI_COMMON_GF_HI with 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint),
value);
} else {
value = dev->cfg.guest_feat_lo;
SPDK_DEBUGLOG(vfu_virtio, "%s: READ PCI_COMMON_GF_LO with 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint),
value);
}
break;
case VIRTIO_PCI_COMMON_MSIX:
value = dev->cfg.msix_config;
SPDK_DEBUGLOG(vfu_virtio, "%s: READ PCI_COMMON_MSIX with 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint),
value);
break;
case VIRTIO_PCI_COMMON_NUMQ:
value = dev->num_queues;
SPDK_DEBUGLOG(vfu_virtio, "%s: READ PCI_COMMON_NUMQ with 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint),
value);
break;
case VIRTIO_PCI_COMMON_STATUS:
value = dev->cfg.device_status;
SPDK_DEBUGLOG(vfu_virtio, "%s: READ PCI_COMMON_STATUS with 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint),
value);
break;
case VIRTIO_PCI_COMMON_CFGGENERATION:
value = dev->cfg.config_generation;
SPDK_DEBUGLOG(vfu_virtio, "%s: READ PCI_COMMON_CFGGENERATION with 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint),
value);
break;
case VIRTIO_PCI_COMMON_Q_NOFF:
value = dev->cfg.queue_select;
SPDK_DEBUGLOG(vfu_virtio, "%s: READ PCI_COMMON_Q_NOFF with 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint),
value);
break;
case VIRTIO_PCI_COMMON_Q_SELECT:
value = dev->cfg.queue_select;
SPDK_DEBUGLOG(vfu_virtio, "%s: READ PCI_COMMON_Q_SELECT with 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint),
value);
break;
case VIRTIO_PCI_COMMON_Q_SIZE:
value = dev->vqs[dev->cfg.queue_select].qsize;
SPDK_DEBUGLOG(vfu_virtio, "%s: READ queue %u PCI_COMMON_Q_SIZE with 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint),
dev->cfg.queue_select, value);
break;
case VIRTIO_PCI_COMMON_Q_MSIX:
value = dev->vqs[dev->cfg.queue_select].vector;
SPDK_DEBUGLOG(vfu_virtio, "%s: READ queue %u PCI_COMMON_Q_MSIX with 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint),
dev->cfg.queue_select, value);
break;
case VIRTIO_PCI_COMMON_Q_ENABLE:
value = dev->vqs[dev->cfg.queue_select].enabled;
SPDK_DEBUGLOG(vfu_virtio, "%s: READ queue %u PCI_COMMON_Q_ENABLE with 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), dev->cfg.queue_select, value);
break;
case VIRTIO_PCI_COMMON_Q_DESCLO:
value = dev->vqs[dev->cfg.queue_select].desc_lo;
SPDK_DEBUGLOG(vfu_virtio, "%s: READ queue %u PCI_COMMON_Q_DESCLO with 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), dev->cfg.queue_select, value);
break;
case VIRTIO_PCI_COMMON_Q_DESCHI:
value = dev->vqs[dev->cfg.queue_select].desc_hi;
SPDK_DEBUGLOG(vfu_virtio, "%s: READ queue %u PCI_COMMON_Q_DESCHI with 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), dev->cfg.queue_select, value);
break;
case VIRTIO_PCI_COMMON_Q_AVAILLO:
value = dev->vqs[dev->cfg.queue_select].avail_lo;
SPDK_DEBUGLOG(vfu_virtio, "%s: READ queue %u PCI_COMMON_Q_AVAILLO with 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), dev->cfg.queue_select, value);
break;
case VIRTIO_PCI_COMMON_Q_AVAILHI:
value = dev->vqs[dev->cfg.queue_select].avail_hi;
SPDK_DEBUGLOG(vfu_virtio, "%s: READ queue %u PCI_COMMON_Q_AVAILHI with 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), dev->cfg.queue_select, value);
break;
case VIRTIO_PCI_COMMON_Q_USEDLO:
value = dev->vqs[dev->cfg.queue_select].used_lo;
SPDK_DEBUGLOG(vfu_virtio, "%s: READ queue %u PCI_COMMON_Q_USEDLO with 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), dev->cfg.queue_select, value);
break;
case VIRTIO_PCI_COMMON_Q_USEDHI:
value = dev->vqs[dev->cfg.queue_select].used_hi;
SPDK_DEBUGLOG(vfu_virtio, "%s: READ queue %u PCI_COMMON_Q_USEDHI with 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), dev->cfg.queue_select, value);
break;
default:
SPDK_ERRLOG("%s: READ UNSUPPORTED offset 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), offset);
errno = EIO;
return -1;
}
memcpy(buf, &value, count);
}
return count;
}
static int
virtio_vfu_device_specific_cfg(struct vfu_virtio_endpoint *virtio_endpoint, char *buf,
size_t count, loff_t pos, bool is_write)
{
loff_t offset;
int ret = -1;
assert(count <= 8);
offset = pos - VIRTIO_PCI_SPECIFIC_CFG_OFFSET;
if (!is_write) {
if (virtio_endpoint->virtio_ops.get_config) {
ret = virtio_endpoint->virtio_ops.get_config(virtio_endpoint, buf, offset, count);
}
} else {
if (virtio_endpoint->virtio_ops.set_config) {
ret = virtio_endpoint->virtio_ops.set_config(virtio_endpoint, buf, offset, count);
}
}
if (ret < 0) {
return ret;
}
return count;
}
static int
virtio_vfu_pci_isr(struct vfu_virtio_endpoint *virtio_endpoint, char *buf,
size_t count, bool is_write)
{
uint8_t *isr;
if (count != 1) {
SPDK_ERRLOG("ISR register is 1 byte\n");
errno = EIO;
return -1;
}
isr = buf;
if (!is_write) {
SPDK_DEBUGLOG(vfu_virtio, "READ PCI ISR\n");
/* Read-Acknowledge Clear */
*isr = virtio_endpoint->dev->cfg.isr;
virtio_endpoint->dev->cfg.isr = 0;
} else {
SPDK_ERRLOG("ISR register is RO\n");
errno = EIO;
return -1;
}
return count;
}
static ssize_t
virtio_vfu_access_bar4(vfu_ctx_t *vfu_ctx, char *buf, size_t count,
loff_t pos,
bool is_write)
{
struct spdk_vfu_endpoint *endpoint = vfu_get_private(vfu_ctx);
struct vfu_virtio_endpoint *virtio_endpoint = spdk_vfu_get_endpoint_private(endpoint);
uint64_t start, end;
start = pos;
end = start + count;
SPDK_DEBUGLOG(vfu_virtio, "%s: %s bar4 0x%"PRIX64"-0x%"PRIX64", len = %lu\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint),
is_write ? "write" : "read", start, end - 1, count);
if (end < VIRTIO_PCI_COMMON_CFG_OFFSET + VIRTIO_PCI_COMMON_CFG_LENGTH) {
/* virtio PCI common configuration */
return virtio_vfu_pci_common_cfg(virtio_endpoint, buf, count, pos, is_write);
} else if (start >= VIRTIO_PCI_ISR_ACCESS_OFFSET &&
end < VIRTIO_PCI_ISR_ACCESS_OFFSET + VIRTIO_PCI_ISR_ACCESS_LENGTH) {
/* ISR access */
return virtio_vfu_pci_isr(virtio_endpoint, buf, count, is_write);
} else if (start >= VIRTIO_PCI_SPECIFIC_CFG_OFFSET &&
end < VIRTIO_PCI_SPECIFIC_CFG_OFFSET + VIRTIO_PCI_SPECIFIC_CFG_LENGTH) {
/* Device specific configuration */
return virtio_vfu_device_specific_cfg(virtio_endpoint, buf, count, pos, is_write);
} else if (start >= VIRTIO_PCI_NOTIFICATIONS_OFFSET &&
end < VIRTIO_PCI_NOTIFICATIONS_OFFSET + VIRTIO_PCI_NOTIFICATIONS_LENGTH) {
/* Notifications */
/* Sparse mmap region by default, there are no MMIO R/W messages */
assert(false);
return count;
} else {
assert(false);
}
return 0;
}
int
vfu_virtio_post_memory_add(struct spdk_vfu_endpoint *endpoint, void *map_start, void *map_end)
{
struct vfu_virtio_endpoint *virtio_endpoint = spdk_vfu_get_endpoint_private(endpoint);
struct vfu_virtio_dev *dev = virtio_endpoint->dev;
uint32_t i;
if (!dev) {
return 0;
}
for (i = 0; i < dev->num_queues; i++) {
/* Try to remap VQs if necessary */
virtio_dev_map_vq(dev, &dev->vqs[i]);
}
return 0;
}
int
vfu_virtio_pre_memory_remove(struct spdk_vfu_endpoint *endpoint, void *map_start, void *map_end)
{
struct vfu_virtio_endpoint *virtio_endpoint = spdk_vfu_get_endpoint_private(endpoint);
if (virtio_endpoint->dev != NULL) {
vfu_virtio_dev_unmap_vqs(virtio_endpoint->dev, map_start, map_end);
}
return 0;
}
int
vfu_virtio_pci_reset_cb(struct spdk_vfu_endpoint *endpoint)
{
struct vfu_virtio_endpoint *virtio_endpoint = spdk_vfu_get_endpoint_private(endpoint);
if (virtio_endpoint->dev) {
vfu_virtio_dev_stop(virtio_endpoint->dev);
vfu_virtio_dev_reset(virtio_endpoint->dev);
}
return 0;
}
static ssize_t
access_pci_config(vfu_ctx_t *vfu_ctx, char *buf, size_t count, loff_t offset,
bool is_write)
{
struct spdk_vfu_endpoint *endpoint = vfu_get_private(vfu_ctx);
void *pci_config = spdk_vfu_endpoint_get_pci_config(endpoint);
SPDK_DEBUGLOG(vfu_virtio,
"%s: PCI_CFG %s %#lx-%#lx\n",
spdk_vfu_get_endpoint_id(endpoint), is_write ? "write" : "read",
offset, offset + count);
if (is_write) {
SPDK_ERRLOG("write %#lx-%#lx not supported\n",
offset, offset + count);
errno = EINVAL;
return -1;
}
if (offset + count > 0x1000) {
SPDK_ERRLOG("access past end of extended PCI configuration space, want=%ld+%ld, max=%d\n",
offset, count, 0x1000);
errno = ERANGE;
return -1;
}
memcpy(buf, ((unsigned char *)pci_config) + offset, count);
return count;
}
static int
vfu_virtio_dev_start(struct vfu_virtio_dev *dev)
{
struct vfu_virtio_endpoint *virtio_endpoint = dev->virtio_endpoint;
int ret = 0;
SPDK_DEBUGLOG(vfu_virtio, "start %s\n", dev->name);
if (virtio_dev_is_started(dev)) {
SPDK_ERRLOG("Device %s is already started\n", dev->name);
return -EFAULT;
}
if (virtio_endpoint->virtio_ops.start_device) {
virtio_endpoint->io_outstanding = 0;
ret = virtio_endpoint->virtio_ops.start_device(virtio_endpoint);
}
SPDK_DEBUGLOG(vfu_virtio, "%s is started with ret %d\n", dev->name, ret);
return ret;
}
static int
vfu_virtio_dev_stop(struct vfu_virtio_dev *dev)
{
struct vfu_virtio_endpoint *virtio_endpoint = dev->virtio_endpoint;
int ret = 0;
SPDK_DEBUGLOG(vfu_virtio, "stop %s\n", dev->name);
if (!virtio_dev_is_started(dev)) {
SPDK_DEBUGLOG(vfu_virtio, "%s isn't started\n", dev->name);
return 0;
}
if (virtio_endpoint->virtio_ops.stop_device) {
ret = virtio_endpoint->virtio_ops.stop_device(virtio_endpoint);
assert(ret == 0);
}
/* Unmap all VQs */
vfu_virtio_dev_unmap_vqs(dev, NULL, NULL);
return ret;
}
int
vfu_virtio_detach_device(struct spdk_vfu_endpoint *endpoint)
{
struct vfu_virtio_endpoint *virtio_endpoint = spdk_vfu_get_endpoint_private(endpoint);
struct vfu_virtio_dev *dev = virtio_endpoint->dev;
if (virtio_endpoint->dev == NULL) {
return 0;
}
SPDK_DEBUGLOG(vfu_virtio, "detach device %s\n", dev->name);
vfu_virtio_dev_stop(dev);
vfu_virtio_dev_free_reqs(virtio_endpoint, dev);
virtio_endpoint->dev = NULL;
free(dev);
return 0;
}
int
vfu_virtio_attach_device(struct spdk_vfu_endpoint *endpoint)
{
struct vfu_virtio_endpoint *virtio_endpoint = spdk_vfu_get_endpoint_private(endpoint);
uint64_t supported_features = 0;
struct vfu_virtio_dev *dev;
struct vfu_virtio_vq *vq;
struct vfu_virtio_req *req;
uint32_t i, j;
int ret = 0;
dev = calloc(1, sizeof(*dev) + virtio_endpoint->num_queues * 3 * dma_sg_size());
if (dev == NULL) {
return -ENOMEM;
}
dev->num_queues = virtio_endpoint->num_queues;
for (i = 0; i < dev->num_queues; i++) {
vq = &dev->vqs[i];
vq->id = i;
vq->qsize = virtio_endpoint->qsize;
vq->avail.sg = (dma_sg_t *)(dev->sg + i * dma_sg_size() * 3);
vq->used.sg = (dma_sg_t *)((uint8_t *)vq->avail.sg + dma_sg_size());
vq->desc.sg = (dma_sg_t *)((uint8_t *)vq->used.sg + dma_sg_size());
STAILQ_INIT(&vq->free_reqs);
for (j = 0; j <= vq->qsize; j++) {
req = vfu_virtio_vq_alloc_req(virtio_endpoint, vq);
if (!req) {
SPDK_ERRLOG("Error to allocate req\n");
ret = -ENOMEM;
goto out;
}
req->indirect_iov = &req->iovs[VIRTIO_DEV_MAX_IOVS];
req->indirect_sg = virtio_req_to_sg_t(req, VIRTIO_DEV_MAX_IOVS);
req->dev = dev;
req->vq = vq;
STAILQ_INSERT_TAIL(&vq->free_reqs, req, link);
}
}
if (virtio_endpoint->virtio_ops.get_device_features) {
supported_features = virtio_endpoint->virtio_ops.get_device_features(virtio_endpoint);
}
dev->host_features = supported_features;
snprintf(dev->name, SPDK_VFU_MAX_NAME_LEN, "%s",
spdk_vfu_get_endpoint_name(virtio_endpoint->endpoint));
virtio_endpoint->dev = dev;
dev->virtio_endpoint = virtio_endpoint;
virtio_endpoint->thread = spdk_get_thread();
return 0;
out:
vfu_virtio_dev_free_reqs(virtio_endpoint, dev);
return ret;
}
int
vfu_virtio_endpoint_setup(struct vfu_virtio_endpoint *virtio_endpoint,
struct spdk_vfu_endpoint *endpoint,
char *basename, const char *endpoint_name,
struct vfu_virtio_ops *ops)
{
char path[PATH_MAX] = "";
int ret;
if (!ops) {
return -EINVAL;
}
ret = snprintf(path, PATH_MAX, "%s%s_bar4", basename, endpoint_name);
if (ret < 0 || ret >= PATH_MAX) {
SPDK_ERRLOG("%s: error to get socket path: %s.\n", basename, spdk_strerror(errno));
return -EINVAL;
}
ret = open(path, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
if (ret == -1) {
SPDK_ERRLOG("%s: failed to open device memory at %s.\n",
path, spdk_strerror(errno));
return ret;
}
unlink(path);
virtio_endpoint->devmem_fd = ret;
ret = ftruncate(virtio_endpoint->devmem_fd, VIRTIO_PCI_BAR4_LENGTH);
if (ret != 0) {
SPDK_ERRLOG("%s: error to ftruncate file %s.\n", path,
spdk_strerror(errno));
close(virtio_endpoint->devmem_fd);
return ret;
}
virtio_endpoint->doorbells = mmap(NULL, VIRTIO_PCI_NOTIFICATIONS_LENGTH, PROT_READ | PROT_WRITE,
MAP_SHARED,
virtio_endpoint->devmem_fd, VIRTIO_PCI_NOTIFICATIONS_OFFSET);
if (virtio_endpoint->doorbells == MAP_FAILED) {
SPDK_ERRLOG("%s: error to mmap file %s.\n", path, spdk_strerror(errno));
close(virtio_endpoint->devmem_fd);
return -EFAULT;
}
virtio_endpoint->endpoint = endpoint;
virtio_endpoint->virtio_ops = *ops;
virtio_endpoint->num_queues = VIRTIO_DEV_MAX_VQS;
virtio_endpoint->qsize = VIRTIO_VQ_DEFAULT_SIZE;
SPDK_DEBUGLOG(vfu_virtio, "mmap file %s, devmem_fd %d\n", path, virtio_endpoint->devmem_fd);
return 0;
}
int
vfu_virtio_endpoint_destruct(struct vfu_virtio_endpoint *virtio_endpoint)
{
if (virtio_endpoint->doorbells) {
munmap((void *)virtio_endpoint->doorbells, VIRTIO_PCI_NOTIFICATIONS_LENGTH);
}
if (virtio_endpoint->devmem_fd) {
close(virtio_endpoint->devmem_fd);
}
return 0;
}
static int
vfu_virtio_quiesce_poll(void *ctx)
{
struct vfu_virtio_endpoint *virtio_endpoint = ctx;
vfu_ctx_t *vfu_ctx = spdk_vfu_get_vfu_ctx(virtio_endpoint->endpoint);
if (virtio_endpoint->io_outstanding) {
return SPDK_POLLER_IDLE;
}
spdk_poller_unregister(&virtio_endpoint->quiesce_poller);
virtio_endpoint->quiesce_in_progress = false;
vfu_device_quiesced(vfu_ctx, 0);
return SPDK_POLLER_BUSY;
}
int
vfu_virtio_quiesce_cb(struct spdk_vfu_endpoint *endpoint)
{
struct vfu_virtio_endpoint *virtio_endpoint = spdk_vfu_get_endpoint_private(endpoint);
if (virtio_endpoint->quiesce_in_progress) {
return -EBUSY;
}
if (!virtio_endpoint->io_outstanding) {
return 0;
}
virtio_endpoint->quiesce_in_progress = true;
virtio_endpoint->quiesce_poller = SPDK_POLLER_REGISTER(vfu_virtio_quiesce_poll, virtio_endpoint,
10);
return -EBUSY;
}
static struct spdk_vfu_pci_device vfu_virtio_device_info = {
.id = {
.vid = SPDK_PCI_VID_VIRTIO,
/* Realize when calling get device information */
.did = 0x0,
.ssvid = SPDK_PCI_VID_VIRTIO,
.ssid = 0x0,
},
.class = {
/* 0x01, mass storage controller */
.bcc = 0x01,
/* 0x00, SCSI controller */
.scc = 0x00,
/* 0x00, SCSI controller - vendor specific interface */
.pi = 0x00,
},
.pmcap = {
.hdr.id = PCI_CAP_ID_PM,
.pmcs.nsfrst = 0x1,
},
.pxcap = {
.hdr.id = PCI_CAP_ID_EXP,
.pxcaps.ver = 0x2,
.pxdcap = {.rer = 0x1, .flrc = 0x1},
.pxdcap2.ctds = 0x1,
},
.msixcap = {
.hdr.id = PCI_CAP_ID_MSIX,
.mxc.ts = VIRTIO_DEV_MAX_VQS - 1,
.mtab = {.tbir = 0x1, .to = 0x0},
.mpba = {.pbir = 0x2, .pbao = 0x0},
},
.nr_vendor_caps = 4,
.intr_ipin = 0x1,
.nr_int_irqs = 0x1,
.nr_msix_irqs = VIRTIO_DEV_MAX_VQS,
.regions = {
/* BAR0 */
{0},
/* BAR1 */
{
.access_cb = NULL,
.offset = 0,
.fd = -1,
.len = 0x1000,
.flags = VFU_REGION_FLAG_RW,
.nr_sparse_mmaps = 0,
},
/* BAR2 */
{
.access_cb = NULL,
.offset = 0,
.fd = -1,
.len = 0x1000,
.flags = VFU_REGION_FLAG_RW,
.nr_sparse_mmaps = 0,
},
/* BAR3 */
{0},
/* BAR4 */
{
.access_cb = virtio_vfu_access_bar4,
.offset = 0,
.fd = -1,
.len = VIRTIO_PCI_BAR4_LENGTH,
.flags = VFU_REGION_FLAG_RW | VFU_REGION_FLAG_MEM,
.nr_sparse_mmaps = 1,
.mmaps = {
{
.offset = VIRTIO_PCI_NOTIFICATIONS_OFFSET,
.len = VIRTIO_PCI_NOTIFICATIONS_LENGTH,
},
},
},
/* BAR5 */
{0},
/* BAR6 */
{0},
/* ROM */
{0},
/* PCI Config */
{
.access_cb = access_pci_config,
.offset = 0,
.fd = -1,
.len = 0x1000,
.flags = VFU_REGION_FLAG_RW,
.nr_sparse_mmaps = 0,
},
},
};
void
vfu_virtio_get_device_info(struct vfu_virtio_endpoint *virtio_endpoint,
struct spdk_vfu_pci_device *device_info)
{
memcpy(device_info, &vfu_virtio_device_info, sizeof(*device_info));
/* BAR4 Region FD */
device_info->regions[VFU_PCI_DEV_BAR4_REGION_IDX].fd = virtio_endpoint->devmem_fd;
SPDK_DEBUGLOG(vfu_virtio, "%s: get device information, fd %d\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint),
virtio_endpoint->devmem_fd);
}
static struct virtio_pci_cap common_cap = {
.cap_vndr = PCI_CAP_ID_VNDR,
.cap_len = sizeof(common_cap),
.cfg_type = VIRTIO_PCI_CAP_COMMON_CFG,
.bar = 4,
.offset = VIRTIO_PCI_COMMON_CFG_OFFSET,
.length = VIRTIO_PCI_COMMON_CFG_LENGTH,
};
static struct virtio_pci_cap isr_cap = {
.cap_vndr = PCI_CAP_ID_VNDR,
.cap_len = sizeof(isr_cap),
.cfg_type = VIRTIO_PCI_CAP_ISR_CFG,
.bar = 4,
.offset = VIRTIO_PCI_ISR_ACCESS_OFFSET,
.length = VIRTIO_PCI_ISR_ACCESS_LENGTH,
};
static struct virtio_pci_cap dev_cap = {
.cap_vndr = PCI_CAP_ID_VNDR,
.cap_len = sizeof(dev_cap),
.cfg_type = VIRTIO_PCI_CAP_DEVICE_CFG,
.bar = 4,
.offset = VIRTIO_PCI_SPECIFIC_CFG_OFFSET,
.length = VIRTIO_PCI_SPECIFIC_CFG_LENGTH,
};
static struct virtio_pci_notify_cap notify_cap = {
.cap = {
.cap_vndr = PCI_CAP_ID_VNDR,
.cap_len = sizeof(notify_cap),
.cfg_type = VIRTIO_PCI_CAP_NOTIFY_CFG,
.bar = 4,
.offset = VIRTIO_PCI_NOTIFICATIONS_OFFSET,
.length = VIRTIO_PCI_NOTIFICATIONS_LENGTH,
},
.notify_off_multiplier = 4,
};
uint16_t
vfu_virtio_get_vendor_capability(struct spdk_vfu_endpoint *endpoint, char *buf,
uint16_t buf_len,
uint16_t idx)
{
uint16_t len;
SPDK_DEBUGLOG(vfu_virtio, "%s: get vendor capability, idx %u\n",
spdk_vfu_get_endpoint_id(endpoint), idx);
switch (idx) {
case 0:
assert(buf_len > sizeof(common_cap));
memcpy(buf, &common_cap, sizeof(common_cap));
len = sizeof(common_cap);
break;
case 1:
assert(buf_len > sizeof(isr_cap));
memcpy(buf, &isr_cap, sizeof(isr_cap));
len = sizeof(isr_cap);
break;
case 2:
assert(buf_len > sizeof(dev_cap));
memcpy(buf, &dev_cap, sizeof(dev_cap));
len = sizeof(dev_cap);
break;
case 3:
assert(buf_len > sizeof(notify_cap));
memcpy(buf, &notify_cap, sizeof(notify_cap));
len = sizeof(notify_cap);
break;
default:
return 0;
}
return len;
}
SPDK_LOG_REGISTER_COMPONENT(vfu_virtio)