Spdk/module/vfu_device/vfu_virtio.c

1779 lines
48 KiB
C
Raw Normal View History

/* SPDX-License-Identifier: BSD-3-Clause
* Copyright (C) 2022 Intel Corporation.
* All rights reserved.
*/
/*
* virtio over vfio-user common library
*/
#include "spdk/env.h"
#include "spdk/bdev.h"
#include "spdk/bdev_module.h"
#include "spdk/stdinc.h"
#include "spdk/assert.h"
#include "spdk/barrier.h"
#include "spdk/thread.h"
#include "spdk/memory.h"
#include "spdk/util.h"
#include "spdk/log.h"
#include "spdk/string.h"
#include "spdk/likely.h"
#include "vfu_virtio_internal.h"
static int vfu_virtio_dev_start(struct vfu_virtio_dev *dev);
static int vfu_virtio_dev_stop(struct vfu_virtio_dev *dev);
static inline void
vfu_virtio_unmap_q(struct vfu_virtio_dev *dev, struct q_mapping *mapping)
{
struct vfu_virtio_endpoint *virtio_endpoint = dev->virtio_endpoint;
if (mapping->addr != NULL) {
spdk_vfu_unmap_sg(virtio_endpoint->endpoint, mapping->sg,
&mapping->iov, 1);
mapping->addr = NULL;
mapping->len = 0;
}
}
static inline int
vfu_virtio_map_q(struct vfu_virtio_dev *dev, struct q_mapping *mapping, uint64_t phys_addr,
uint64_t len)
{
struct vfu_virtio_endpoint *virtio_endpoint = dev->virtio_endpoint;
void *addr;
if (!mapping->addr && len && phys_addr) {
addr = spdk_vfu_map_one(virtio_endpoint->endpoint, phys_addr, len,
mapping->sg, &mapping->iov, PROT_READ | PROT_WRITE);
if (addr == NULL) {
return -EINVAL;
}
mapping->phys_addr = phys_addr;
mapping->len = len;
mapping->addr = addr;
}
return 0;
}
static int
virtio_dev_map_vq(struct vfu_virtio_dev *dev, struct vfu_virtio_vq *vq)
{
int ret;
uint64_t phys_addr, len;
if (!vq->enabled || (vq->q_state == VFU_VQ_ACTIVE)) {
return 0;
}
SPDK_DEBUGLOG(vfu_virtio, "%s: try to map vq %u\n", dev->name, vq->id);
len = virtio_queue_desc_size(dev, vq);
phys_addr = ((((uint64_t)vq->desc_hi) << 32) | vq->desc_lo);
ret = vfu_virtio_map_q(dev, &vq->desc, phys_addr, len);
if (ret) {
SPDK_DEBUGLOG(vfu_virtio, "Error to map descs\n");
return ret;
}
len = virtio_queue_avail_size(dev, vq);
phys_addr = ((((uint64_t)vq->avail_hi) << 32) | vq->avail_lo);
ret = vfu_virtio_map_q(dev, &vq->avail, phys_addr, len);
if (ret) {
vfu_virtio_unmap_q(dev, &vq->desc);
SPDK_DEBUGLOG(vfu_virtio, "Error to map available ring\n");
return ret;
}
len = virtio_queue_used_size(dev, vq);
phys_addr = ((((uint64_t)vq->used_hi) << 32) | vq->used_lo);
ret = vfu_virtio_map_q(dev, &vq->used, phys_addr, len);
if (ret) {
vfu_virtio_unmap_q(dev, &vq->desc);
vfu_virtio_unmap_q(dev, &vq->avail);
SPDK_DEBUGLOG(vfu_virtio, "Error to map used ring\n");
return ret;
}
/* We're running with polling mode */
if (virtio_guest_has_feature(dev, VIRTIO_F_RING_PACKED)) {
vq->used.device_event->flags = VRING_PACKED_EVENT_FLAG_DISABLE;
} else {
vq->used.used->flags = VRING_USED_F_NO_NOTIFY;
}
SPDK_DEBUGLOG(vfu_virtio, "%s: map vq %u successfully\n", dev->name, vq->id);
vq->q_state = VFU_VQ_ACTIVE;
return 0;
}
static void
virtio_dev_unmap_vq(struct vfu_virtio_dev *dev, struct vfu_virtio_vq *vq)
{
SPDK_DEBUGLOG(vfu_virtio, "%s: unmap vq %u\n", dev->name, vq->id);
vq->q_state = VFU_VQ_INACTIVE;
vfu_virtio_unmap_q(dev, &vq->desc);
vfu_virtio_unmap_q(dev, &vq->avail);
vfu_virtio_unmap_q(dev, &vq->used);
}
static bool
vfu_virtio_vq_should_unmap(struct vfu_virtio_vq *vq, void *map_start, void *map_end)
{
/* always do unmap when stopping the device */
if (!map_start || !map_end) {
return true;
}
if (vq->desc.addr >= map_start && vq->desc.addr < map_end) {
return true;
}
if (vq->avail.addr >= map_start && vq->avail.addr < map_end) {
return true;
}
if (vq->used.addr >= map_start && vq->used.addr < map_end) {
return true;
}
return false;
}
static void
vfu_virtio_dev_unmap_vqs(struct vfu_virtio_dev *dev, void *map_start, void *map_end)
{
uint32_t i;
struct vfu_virtio_vq *vq;
for (i = 0; i < dev->num_queues; i++) {
vq = &dev->vqs[i];
if (!vq->enabled) {
continue;
}
if (!vfu_virtio_vq_should_unmap(vq, map_start, map_end)) {
continue;
}
virtio_dev_unmap_vq(dev, vq);
}
}
/* This function is used to notify VM that the device
* configuration space has been changed.
*/
void
vfu_virtio_notify_config(struct vfu_virtio_endpoint *virtio_endpoint)
{
struct spdk_vfu_endpoint *endpoint = virtio_endpoint->endpoint;
if (virtio_endpoint->dev == NULL) {
return;
}
virtio_endpoint->dev->cfg.isr = 1;
virtio_endpoint->dev->cfg.config_generation++;
vfu_irq_trigger(spdk_vfu_get_vfu_ctx(endpoint), virtio_endpoint->dev->cfg.msix_config);
}
static void
vfu_virtio_dev_reset(struct vfu_virtio_dev *dev)
{
uint32_t i;
struct vfu_virtio_vq *vq;
SPDK_DEBUGLOG(vfu_virtio, "device %s resetting\n", dev->name);
for (i = 0; i < dev->num_queues; i++) {
vq = &dev->vqs[i];
vq->q_state = VFU_VQ_CREATED;
vq->vector = 0;
vq->enabled = false;
vq->last_avail_idx = 0;
vq->last_used_idx = 0;
vq->packed.packed_ring = false;
vq->packed.avail_phase = 0;
vq->packed.used_phase = 0;
}
memset(&dev->cfg, 0, sizeof(struct virtio_pci_cfg));
}
static int
virtio_dev_set_status(struct vfu_virtio_dev *dev, uint8_t status)
{
int ret = 0;
SPDK_DEBUGLOG(vfu_virtio, "device current status %x, set status %x\n", dev->cfg.device_status,
status);
if (!(virtio_dev_is_started(dev))) {
if (status & VIRTIO_CONFIG_S_DRIVER_OK) {
ret = vfu_virtio_dev_start(dev);
}
} else {
if (!(status & VIRTIO_CONFIG_S_DRIVER_OK)) {
ret = vfu_virtio_dev_stop(dev);
}
}
if (ret) {
SPDK_ERRLOG("Failed to start/stop device\n");
return ret;
}
dev->cfg.device_status = status;
if (status == 0) {
vfu_virtio_dev_reset(dev);
}
return 0;
}
static int
virtio_dev_set_features(struct vfu_virtio_dev *dev, uint64_t features)
{
if (dev->cfg.device_status & VIRTIO_CONFIG_S_FEATURES_OK) {
SPDK_ERRLOG("Feature negotiation has finished\n");
return -EINVAL;
}
if (features & ~dev->host_features) {
SPDK_ERRLOG("Host features 0x%"PRIx64", guest features 0x%"PRIx64"\n",
dev->host_features, features);
return -ENOTSUP;
}
SPDK_DEBUGLOG(vfu_virtio, "%s: negotiated features 0x%"PRIx64"\n", dev->name,
features);
dev->cfg.guest_features = features;
return 0;
}
static int
virtio_dev_enable_vq(struct vfu_virtio_dev *dev, uint16_t qid)
{
struct vfu_virtio_vq *vq;
SPDK_DEBUGLOG(vfu_virtio, "%s: enable vq %u\n", dev->name, qid);
vq = &dev->vqs[qid];
if (vq->enabled) {
SPDK_ERRLOG("Queue %u is enabled\n", qid);
return -EINVAL;
}
vq->enabled = true;
if (virtio_dev_map_vq(dev, vq)) {
SPDK_ERRLOG("Queue %u failed to map\n", qid);
return 0;
}
vq->avail.avail->idx = 0;
vq->last_avail_idx = 0;
vq->used.used->idx = 0;
vq->last_used_idx = 0;
if (virtio_guest_has_feature(dev, VIRTIO_F_RING_PACKED)) {
SPDK_DEBUGLOG(vfu_virtio, "%s: vq %u PACKED RING ENABLED\n", dev->name, qid);
vq->packed.packed_ring = true;
vq->packed.avail_phase = true;
vq->packed.used_phase = true;
}
return 0;
}
static int
virtio_dev_disable_vq(struct vfu_virtio_dev *dev, uint16_t qid)
{
struct vfu_virtio_vq *vq;
SPDK_DEBUGLOG(vfu_virtio, "%s: disable vq %u\n", dev->name, qid);
vq = &dev->vqs[qid];
if (!vq->enabled) {
SPDK_NOTICELOG("Queue %u isn't enabled\n", qid);
return 0;
}
virtio_dev_unmap_vq(dev, vq);
vq->q_state = VFU_VQ_CREATED;
vq->vector = 0;
vq->enabled = false;
vq->last_avail_idx = 0;
vq->last_used_idx = 0;
vq->packed.packed_ring = false;
vq->packed.avail_phase = 0;
vq->packed.used_phase = 0;
return 0;
}
static int
virtio_dev_split_get_avail_reqs(struct vfu_virtio_dev *dev, struct vfu_virtio_vq *vq,
uint16_t *reqs, uint16_t max_reqs)
{
uint16_t count, i, avail_idx, last_idx;
last_idx = vq->last_avail_idx;
avail_idx = vq->avail.avail->idx;
spdk_smp_rmb();
count = avail_idx - last_idx;
if (count == 0) {
return 0;
}
count = spdk_min(count, max_reqs);
vq->last_avail_idx += count;
for (i = 0; i < count; i++) {
reqs[i] = vq->avail.avail->ring[(last_idx + i) & (vq->qsize - 1)];
}
SPDK_DEBUGLOG(vfu_virtio_io,
"AVAIL: vq %u last_idx=%"PRIu16" avail_idx=%"PRIu16" count=%"PRIu16"\n",
vq->id, last_idx, avail_idx, count);
return count;
}
static int
virtio_vring_split_desc_get_next(struct vring_desc **desc,
struct vring_desc *desc_table,
uint32_t desc_table_size)
{
struct vring_desc *old_desc = *desc;
uint16_t next_idx;
if ((old_desc->flags & VRING_DESC_F_NEXT) == 0) {
*desc = NULL;
return 0;
}
next_idx = old_desc->next;
if (spdk_unlikely(next_idx >= desc_table_size)) {
*desc = NULL;
return -1;
}
*desc = &desc_table[next_idx];
return 0;
}
static inline void *
virtio_vring_desc_to_iov(struct vfu_virtio_dev *dev, struct vring_desc *desc,
dma_sg_t *sg, struct iovec *iov)
{
struct vfu_virtio_endpoint *virtio_endpoint = dev->virtio_endpoint;
return spdk_vfu_map_one(virtio_endpoint->endpoint, desc->addr, desc->len,
sg, iov, PROT_READ | PROT_WRITE);
}
static int
virtio_split_vring_get_desc(struct vfu_virtio_dev *dev, struct vfu_virtio_vq *vq, uint16_t desc_idx,
struct vring_desc **desc, struct vring_desc **desc_table,
uint32_t *desc_table_size,
dma_sg_t *sg, struct iovec *iov)
{
*desc = &vq->desc.desc[desc_idx];
if (virtio_vring_split_desc_is_indirect(*desc)) {
*desc_table_size = (*desc)->len / sizeof(struct vring_desc);
*desc_table = virtio_vring_desc_to_iov(dev, *desc, sg, iov);
*desc = *desc_table;
if (*desc == NULL) {
return -EINVAL;
}
return 0;
}
*desc_table = vq->desc.desc;
*desc_table_size = vq->qsize;
return 0;
}
static inline dma_sg_t *
virtio_req_to_sg_t(struct vfu_virtio_req *req, uint32_t iovcnt)
{
return (dma_sg_t *)(req->sg + iovcnt * dma_sg_size());
}
static inline struct vfu_virtio_req *
vfu_virtio_dev_get_req(struct vfu_virtio_endpoint *virtio_endpoint, struct vfu_virtio_vq *vq)
{
struct vfu_virtio_req *req;
req = STAILQ_FIRST(&vq->free_reqs);
if (req == NULL) {
return NULL;
}
STAILQ_REMOVE_HEAD(&vq->free_reqs, link);
req->iovcnt = 0;
req->used_len = 0;
req->payload_size = 0;
req->req_idx = 0;
req->buffer_id = 0;
req->num_descs = 0;
return req;
}
void
vfu_virtio_dev_put_req(struct vfu_virtio_req *req)
{
struct vfu_virtio_dev *dev = req->dev;
struct vfu_virtio_endpoint *virtio_endpoint = dev->virtio_endpoint;
vfu_ctx_t *vfu_ctx = spdk_vfu_get_vfu_ctx(virtio_endpoint->endpoint);
if (req->indirect_iov->iov_base) {
vfu_sgl_put(vfu_ctx, req->indirect_sg, req->indirect_iov, 1);
req->indirect_iov->iov_base = NULL;
req->indirect_iov->iov_len = 0;
}
if (req->iovcnt) {
vfu_sgl_put(vfu_ctx, virtio_req_to_sg_t(req, 0), req->iovs, req->iovcnt);
req->iovcnt = 0;
}
STAILQ_INSERT_HEAD(&req->vq->free_reqs, req, link);
}
void
vfu_virtio_finish_req(struct vfu_virtio_req *req)
{
struct vfu_virtio_dev *dev = req->dev;
struct vfu_virtio_endpoint *virtio_endpoint = dev->virtio_endpoint;
assert(virtio_endpoint->io_outstanding);
virtio_endpoint->io_outstanding--;
if (!virtio_guest_has_feature(req->dev, VIRTIO_F_RING_PACKED)) {
virtio_vq_used_ring_split_enqueue(req->vq, req->req_idx, req->used_len);
} else {
virtio_vq_used_ring_packed_enqueue(req->vq, req->buffer_id, req->num_descs, req->used_len);
}
vfu_virtio_dev_put_req(req);
}
static inline void
vfu_virtio_dev_free_reqs(struct vfu_virtio_endpoint *virtio_endpoint, struct vfu_virtio_dev *dev)
{
struct vfu_virtio_req *req;
struct vfu_virtio_vq *vq;
uint32_t i;
for (i = 0; i < dev->num_queues; i++) {
vq = &dev->vqs[i];
while (!STAILQ_EMPTY(&vq->free_reqs)) {
req = STAILQ_FIRST(&vq->free_reqs);
STAILQ_REMOVE_HEAD(&vq->free_reqs, link);
vfu_virtio_vq_free_req(virtio_endpoint, vq, req);
}
}
}
static int
virtio_dev_split_iovs_setup(struct vfu_virtio_dev *dev, struct vfu_virtio_vq *vq,
uint16_t desc_idx, struct vfu_virtio_req *req)
{
struct vring_desc *desc, *desc_table;
uint32_t desc_table_size, len = 0;
uint32_t desc_handled_cnt = 0;
int rc;
rc = virtio_split_vring_get_desc(dev, vq, desc_idx, &desc,
&desc_table, &desc_table_size,
req->indirect_sg, req->indirect_iov);
if (spdk_unlikely(rc)) {
SPDK_ERRLOG("Invalid descriptor at index %"PRIu16".\n", desc_idx);
return rc;
}
assert(req->iovcnt == 0);
while (true) {
if (spdk_unlikely(!virtio_vring_desc_to_iov(dev, desc, virtio_req_to_sg_t(req, req->iovcnt),
&req->iovs[req->iovcnt]))) {
return -EINVAL;
}
req->desc_writeable[req->iovcnt] = false;
if (virtio_vring_split_desc_is_wr(desc)) {
req->desc_writeable[req->iovcnt] = true;
}
req->iovcnt++;
len += desc->len;
rc = virtio_vring_split_desc_get_next(&desc, desc_table, desc_table_size);
if (spdk_unlikely(rc)) {
return rc;
} else if (desc == NULL) {
break;
}
desc_handled_cnt++;
if (spdk_unlikely(desc_handled_cnt > desc_table_size)) {
return -EINVAL;
}
}
req->payload_size = len;
return 0;
}
void
virtio_vq_used_ring_split_enqueue(struct vfu_virtio_vq *vq, uint16_t req_idx, uint32_t used_len)
{
uint16_t last_idx = vq->last_used_idx & (vq->qsize - 1);
SPDK_DEBUGLOG(vfu_virtio_io,
"Queue %u - USED RING: last_idx=%"PRIu16" req_idx=%"PRIu16" used_len=%"PRIu32"\n",
vq->id, last_idx, req_idx, used_len);
vq->used.used->ring[last_idx].id = req_idx;
vq->used.used->ring[last_idx].len = used_len;
vq->last_used_idx++;
spdk_smp_wmb();
*(volatile uint16_t *)&vq->used.used->idx = vq->last_used_idx;
vq->used_req_cnt++;
}
void
virtio_vq_used_ring_packed_enqueue(struct vfu_virtio_vq *vq, uint16_t buffer_id, uint32_t num_descs,
uint32_t used_len)
{
struct vring_packed_desc *desc = &vq->desc.desc_packed[vq->last_used_idx];
SPDK_DEBUGLOG(vfu_virtio_io,
"Queue %u - USED RING: buffer_id=%"PRIu16" num_descs=%u used_len=%"PRIu32"\n",
vq->id, buffer_id, num_descs, used_len);
if (spdk_unlikely(virtio_vring_packed_is_used(desc, vq->packed.used_phase))) {
SPDK_ERRLOG("descriptor has been used before\n");
return;
}
/* In used desc addr is unused and len specifies the buffer length
* that has been written to by the device.
*/
desc->addr = 0;
desc->len = used_len;
/* This bit specifies whether any data has been written by the device */
if (used_len != 0) {
desc->flags |= VRING_DESC_F_WRITE;
}
/* Buffer ID is included in the last descriptor in the list.
* The driver needs to keep track of the size of the list corresponding
* to each buffer ID.
*/
desc->id = buffer_id;
/* A device MUST NOT make the descriptor used before buffer_id is
* written to the descriptor.
*/
spdk_smp_wmb();
/* To mark a desc as used, the device sets the F_USED bit in flags to match
* the internal Device ring wrap counter. It also sets the F_AVAIL bit to
* match the same value.
*/
if (vq->packed.used_phase) {
desc->flags |= (1 << VRING_PACKED_DESC_F_AVAIL);
desc->flags |= (1 << VRING_PACKED_DESC_F_USED);
} else {
desc->flags &= ~(1 << VRING_PACKED_DESC_F_AVAIL);
desc->flags &= ~(1 << VRING_PACKED_DESC_F_USED);
}
vq->last_used_idx += num_descs;
if (vq->last_used_idx >= vq->qsize) {
vq->last_used_idx -= vq->qsize;
vq->packed.used_phase = !vq->packed.used_phase;
}
vq->used_req_cnt++;
}
static int
vfu_virtio_vq_post_irq(struct vfu_virtio_dev *dev, struct vfu_virtio_vq *vq)
{
struct vfu_virtio_endpoint *virtio_endpoint = dev->virtio_endpoint;
vfu_ctx_t *vfu_ctx = spdk_vfu_get_vfu_ctx(virtio_endpoint->endpoint);
vq->used_req_cnt = 0;
if (spdk_vfu_endpoint_msix_enabled(virtio_endpoint->endpoint)) {
SPDK_DEBUGLOG(vfu_virtio_io, "%s: Queue %u post MSIX IV %u\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint),
vq->id, vq->vector);
return vfu_irq_trigger(vfu_ctx, vq->vector);
} else {
if (!spdk_vfu_endpoint_intx_enabled(virtio_endpoint->endpoint)) {
SPDK_DEBUGLOG(vfu_virtio_io, "%s: IRQ disabled\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint));
return 0;
}
SPDK_DEBUGLOG(vfu_virtio_io, "%s: Queue %u post ISR\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), vq->id);
dev->cfg.isr = 1;
return vfu_irq_trigger(vfu_ctx, 0);
}
}
void
vfu_virtio_vq_flush_irq(struct vfu_virtio_dev *dev, struct vfu_virtio_vq *vq)
{
struct vfu_virtio_endpoint *virtio_endpoint = dev->virtio_endpoint;
uint32_t delay_us;
if (vq->used_req_cnt == 0) {
return;
}
/* No need to notify client */
if (virtio_queue_event_is_suppressed(dev, vq)) {
return;
}
/* Interrupt coalescing disabled */
if (!virtio_endpoint->coalescing_delay_us) {
vfu_virtio_vq_post_irq(dev, vq);
return;
}
/* No need for event right now */
if (spdk_get_ticks() < vq->next_event_time) {
return;
}
vfu_virtio_vq_post_irq(dev, vq);
delay_us = virtio_endpoint->coalescing_delay_us;
vq->next_event_time = spdk_get_ticks() + delay_us * spdk_get_ticks_hz() / (1000000ULL);
}
int
vfu_virito_dev_process_split_ring(struct vfu_virtio_dev *dev, struct vfu_virtio_vq *vq)
{
struct vfu_virtio_endpoint *virtio_endpoint = dev->virtio_endpoint;
struct vfu_virtio_req *req;
uint16_t reqs_idx[VIRTIO_DEV_VRING_MAX_REQS];
uint16_t reqs_cnt, i;
int ret;
reqs_cnt = virtio_dev_split_get_avail_reqs(dev, vq, reqs_idx, VIRTIO_DEV_VRING_MAX_REQS);
if (!reqs_cnt) {
return 0;
}
SPDK_DEBUGLOG(vfu_virtio_io, "%s: get %u descriptors\n", dev->name, reqs_cnt);
for (i = 0; i < reqs_cnt; i++) {
req = vfu_virtio_dev_get_req(virtio_endpoint, vq);
if (spdk_unlikely(!req)) {
SPDK_ERRLOG("Error to get request\n");
/* TODO: address the error case */
return -EIO;
}
req->req_idx = reqs_idx[i];
ret = virtio_dev_split_iovs_setup(dev, vq, req->req_idx, req);
if (spdk_unlikely(ret)) {
/* let the device to response this error */
SPDK_ERRLOG("Split vring setup failed with index %u\n", i);
}
assert(virtio_endpoint->virtio_ops.exec_request);
virtio_endpoint->io_outstanding++;
virtio_endpoint->virtio_ops.exec_request(virtio_endpoint, vq, req);
}
return i;
}
struct vfu_virtio_req *
virito_dev_split_ring_get_next_avail_req(struct vfu_virtio_dev *dev, struct vfu_virtio_vq *vq)
{
struct vfu_virtio_endpoint *virtio_endpoint = dev->virtio_endpoint;
struct vfu_virtio_req *req;
uint16_t reqs_idx[VIRTIO_DEV_VRING_MAX_REQS];
uint16_t reqs_cnt;
int ret;
reqs_cnt = virtio_dev_split_get_avail_reqs(dev, vq, reqs_idx, 1);
if (!reqs_cnt) {
return NULL;
}
assert(reqs_cnt == 1);
SPDK_DEBUGLOG(vfu_virtio_io, "%s: get 1 descriptors\n", dev->name);
req = vfu_virtio_dev_get_req(virtio_endpoint, vq);
if (!req) {
SPDK_ERRLOG("Error to get request\n");
return NULL;
}
req->req_idx = reqs_idx[0];
ret = virtio_dev_split_iovs_setup(dev, vq, req->req_idx, req);
if (ret) {
SPDK_ERRLOG("Split vring setup failed\n");
vfu_virtio_dev_put_req(req);
return NULL;
}
return req;
}
static inline void *
virtio_vring_packed_desc_to_iov(struct vfu_virtio_dev *dev, struct vring_packed_desc *desc,
dma_sg_t *sg, struct iovec *iov)
{
struct vfu_virtio_endpoint *virtio_endpoint = dev->virtio_endpoint;
return spdk_vfu_map_one(virtio_endpoint->endpoint, desc->addr, desc->len,
sg, iov, PROT_READ | PROT_WRITE);
}
static int
virtio_dev_packed_iovs_setup(struct vfu_virtio_dev *dev, struct vfu_virtio_vq *vq,
uint16_t last_avail_idx,
struct vring_packed_desc *current_desc, struct vfu_virtio_req *req)
{
struct vring_packed_desc *desc, *desc_table = NULL;
uint16_t new_idx, num_descs, desc_table_size = 0;
uint32_t len = 0;
SPDK_DEBUGLOG(vfu_virtio_io, "%s: last avail idx %u, req %p\n", dev->name, last_avail_idx, req);
desc = NULL;
num_descs = 1;
if (virtio_vring_packed_desc_is_indirect(current_desc)) {
req->buffer_id = current_desc->id;
desc_table = virtio_vring_packed_desc_to_iov(dev, current_desc, req->indirect_sg,
req->indirect_iov);
if (spdk_unlikely(desc_table == NULL)) {
SPDK_ERRLOG("Map Indirect Desc to IOV failed\n");
return -EINVAL;
}
desc_table_size = current_desc->len / sizeof(struct vring_packed_desc);
desc = desc_table;
SPDK_DEBUGLOG(vfu_virtio_io, "%s: indirect desc %p, desc size %u, req %p\n",
dev->name, desc_table, desc_table_size, req);
} else {
desc = current_desc;
}
assert(req->iovcnt == 0);
/* Map descs to IOVs */
new_idx = last_avail_idx;
while (1) {
assert(desc != NULL);
if (spdk_unlikely(req->iovcnt == VIRTIO_DEV_MAX_IOVS)) {
SPDK_ERRLOG("Max IOVs in request reached (iovcnt = %d).\n", req->iovcnt);
return -EINVAL;
}
if (spdk_unlikely(!virtio_vring_packed_desc_to_iov(dev, desc, virtio_req_to_sg_t(req, req->iovcnt),
&req->iovs[req->iovcnt]))) {
SPDK_ERRLOG("Map Desc to IOV failed (iovcnt = %d).\n", req->iovcnt);
return -EINVAL;
}
req->desc_writeable[req->iovcnt] = false;
if (virtio_vring_packed_desc_is_wr(desc)) {
req->desc_writeable[req->iovcnt] = true;
}
req->iovcnt++;
len += desc->len;
/* get next desc */
if (desc_table) {
if (req->iovcnt < desc_table_size) {
desc = &desc_table[req->iovcnt];
} else {
desc = NULL;
}
} else {
if ((desc->flags & VRING_DESC_F_NEXT) == 0) {
req->buffer_id = desc->id;
desc = NULL;
} else {
new_idx = (new_idx + 1) % vq->qsize;
desc = &vq->desc.desc_packed[new_idx];
num_descs++;
req->buffer_id = desc->id;
}
}
if (desc == NULL) {
break;
}
}
req->num_descs = num_descs;
vq->last_avail_idx = (new_idx + 1) % vq->qsize;
if (vq->last_avail_idx < last_avail_idx) {
vq->packed.avail_phase = !vq->packed.avail_phase;
}
req->payload_size = len;
SPDK_DEBUGLOG(vfu_virtio_io, "%s: req %p, iovcnt %u, num_descs %u\n",
dev->name, req, req->iovcnt, num_descs);
return 0;
}
int
vfu_virito_dev_process_packed_ring(struct vfu_virtio_dev *dev, struct vfu_virtio_vq *vq)
{
struct vfu_virtio_endpoint *virtio_endpoint = dev->virtio_endpoint;
struct vring_packed_desc *desc;
int ret;
struct vfu_virtio_req *req;
uint16_t i, max_reqs;
max_reqs = VIRTIO_DEV_VRING_MAX_REQS;
for (i = 0; i < max_reqs; i++) {
desc = &vq->desc.desc_packed[vq->last_avail_idx];
if (!virtio_vring_packed_is_avail(desc, vq->packed.avail_phase)) {
return i;
}
req = vfu_virtio_dev_get_req(virtio_endpoint, vq);
if (spdk_unlikely(!req)) {
SPDK_ERRLOG("Error to get request\n");
/* TODO: address the error case */
assert(false);
return -EIO;
}
ret = virtio_dev_packed_iovs_setup(dev, vq, vq->last_avail_idx, desc, req);
if (spdk_unlikely(ret)) {
/* let the device to response the error */
SPDK_ERRLOG("virtio_dev_packed_iovs_setup failed\n");
}
assert(virtio_endpoint->virtio_ops.exec_request);
virtio_endpoint->io_outstanding++;
virtio_endpoint->virtio_ops.exec_request(virtio_endpoint, vq, req);
}
return i;
}
struct vfu_virtio_req *
virito_dev_packed_ring_get_next_avail_req(struct vfu_virtio_dev *dev, struct vfu_virtio_vq *vq)
{
struct vfu_virtio_endpoint *virtio_endpoint = dev->virtio_endpoint;
struct vring_packed_desc *desc;
int ret;
struct vfu_virtio_req *req;
desc = &vq->desc.desc_packed[vq->last_avail_idx];
if (!virtio_vring_packed_is_avail(desc, vq->packed.avail_phase)) {
return NULL;
}
SPDK_DEBUGLOG(vfu_virtio_io, "%s: get 1 descriptors\n", dev->name);
req = vfu_virtio_dev_get_req(virtio_endpoint, vq);
if (!req) {
SPDK_ERRLOG("Error to get request\n");
return NULL;
}
ret = virtio_dev_packed_iovs_setup(dev, vq, vq->last_avail_idx, desc, req);
if (ret) {
SPDK_ERRLOG("virtio_dev_packed_iovs_setup failed\n");
vfu_virtio_dev_put_req(req);
return NULL;
}
return req;
}
static int
virtio_vfu_pci_common_cfg(struct vfu_virtio_endpoint *virtio_endpoint, char *buf,
size_t count, loff_t pos, bool is_write)
{
struct vfu_virtio_dev *dev = virtio_endpoint->dev;
uint32_t offset, value = 0;
int ret;
assert(count <= 4);
offset = pos - VIRTIO_PCI_COMMON_CFG_OFFSET;
if (is_write) {
memcpy(&value, buf, count);
switch (offset) {
case VIRTIO_PCI_COMMON_DFSELECT:
dev->cfg.host_feature_select = value;
SPDK_DEBUGLOG(vfu_virtio, "%s: WRITE PCI_COMMON_DFSELECT with 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint),
value);
break;
case VIRTIO_PCI_COMMON_GFSELECT:
dev->cfg.guest_feature_select = value;
SPDK_DEBUGLOG(vfu_virtio, "%s: WRITE PCI_COMMON_GFSELECT with 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint),
value);
break;
case VIRTIO_PCI_COMMON_GF:
assert(dev->cfg.guest_feature_select <= 1);
if (dev->cfg.guest_feature_select) {
dev->cfg.guest_feat_hi = value;
SPDK_DEBUGLOG(vfu_virtio, "%s: WRITE PCI_COMMON_GF_HI with 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint),
value);
} else {
dev->cfg.guest_feat_lo = value;
SPDK_DEBUGLOG(vfu_virtio, "%s: WRITE PCI_COMMON_GF_LO with 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint),
value);
}
ret = virtio_dev_set_features(dev,
(((uint64_t)dev->cfg.guest_feat_hi << 32) | dev->cfg.guest_feat_lo));
if (ret) {
return ret;
}
break;
case VIRTIO_PCI_COMMON_MSIX:
dev->cfg.msix_config = value;
SPDK_DEBUGLOG(vfu_virtio, "%s: WRITE PCI_COMMON_MSIX with 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint),
value);
break;
case VIRTIO_PCI_COMMON_STATUS:
SPDK_DEBUGLOG(vfu_virtio, "%s: WRITE PCI_COMMON_STATUS with 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint),
value);
ret = virtio_dev_set_status(dev, value);
if (ret) {
return ret;
}
break;
case VIRTIO_PCI_COMMON_Q_SELECT:
if (value < VIRTIO_DEV_MAX_VQS) {
dev->cfg.queue_select = value;
}
SPDK_DEBUGLOG(vfu_virtio, "%s: WRITE PCI_COMMON_Q_SELECT with 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint),
value);
break;
case VIRTIO_PCI_COMMON_Q_SIZE:
dev->vqs[dev->cfg.queue_select].qsize = value;
SPDK_DEBUGLOG(vfu_virtio, "%s: WRITE PCI_COMMON_Q_SIZE with 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint),
value);
break;
case VIRTIO_PCI_COMMON_Q_MSIX:
dev->vqs[dev->cfg.queue_select].vector = value;
SPDK_DEBUGLOG(vfu_virtio, "%s: WRITE PCI_COMMON_Q_MSIX with 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint),
value);
break;
case VIRTIO_PCI_COMMON_Q_ENABLE:
SPDK_DEBUGLOG(vfu_virtio, "%s: WRITE PCI_COMMON_Q_ENABLE with 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint),
value);
if (value == 1) {
ret = virtio_dev_enable_vq(dev, dev->cfg.queue_select);
if (ret) {
return ret;
}
} else {
ret = virtio_dev_disable_vq(dev, dev->cfg.queue_select);
if (ret) {
return ret;
}
}
break;
case VIRTIO_PCI_COMMON_Q_DESCLO:
dev->vqs[dev->cfg.queue_select].desc_lo = value;
SPDK_DEBUGLOG(vfu_virtio, "%s: WRITE queue %u PCI_COMMON_Q_DESCLO with 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), dev->cfg.queue_select, value);
break;
case VIRTIO_PCI_COMMON_Q_DESCHI:
dev->vqs[dev->cfg.queue_select].desc_hi = value;
SPDK_DEBUGLOG(vfu_virtio, "%s: WRITE queue %u PCI_COMMON_Q_DESCHI with 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), dev->cfg.queue_select, value);
break;
case VIRTIO_PCI_COMMON_Q_AVAILLO:
dev->vqs[dev->cfg.queue_select].avail_lo = value;
SPDK_DEBUGLOG(vfu_virtio, "%s: WRITE queue %u PCI_COMMON_Q_AVAILLO with 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), dev->cfg.queue_select, value);
break;
case VIRTIO_PCI_COMMON_Q_AVAILHI:
dev->vqs[dev->cfg.queue_select].avail_hi = value;
SPDK_DEBUGLOG(vfu_virtio, "%s: WRITE queue %u PCI_COMMON_Q_AVAILHI with 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), dev->cfg.queue_select, value);
break;
case VIRTIO_PCI_COMMON_Q_USEDLO:
dev->vqs[dev->cfg.queue_select].used_lo = value;
SPDK_DEBUGLOG(vfu_virtio, "%s: WRITE queue %u PCI_COMMON_Q_USEDLO with 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), dev->cfg.queue_select, value);
break;
case VIRTIO_PCI_COMMON_Q_USEDHI:
dev->vqs[dev->cfg.queue_select].used_hi = value;
SPDK_DEBUGLOG(vfu_virtio, "%s: WRITE queue %u PCI_COMMON_Q_USEDHI with 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), dev->cfg.queue_select, value);
break;
default:
SPDK_ERRLOG("%s: WRITE UNSUPPORTED offset 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), offset);
errno = EIO;
return -1;
}
} else {
switch (offset) {
case VIRTIO_PCI_COMMON_DFSELECT:
value = dev->cfg.host_feature_select;
SPDK_DEBUGLOG(vfu_virtio, "%s: READ PCI_COMMON_DFSELECT with 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint),
value);
break;
case VIRTIO_PCI_COMMON_DF:
assert(dev->cfg.host_feature_select <= 1);
if (dev->cfg.host_feature_select) {
value = dev->host_features >> 32;
SPDK_DEBUGLOG(vfu_virtio, "%s: READ PCI_COMMON_DF_HI with 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint),
value);
} else {
value = dev->host_features;
SPDK_DEBUGLOG(vfu_virtio, "%s: READ PCI_COMMON_DF_LO with 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint),
value);
}
break;
case VIRTIO_PCI_COMMON_GFSELECT:
value = dev->cfg.guest_feature_select;
SPDK_DEBUGLOG(vfu_virtio, "%s: READ PCI_COMMON_GFSELECT with 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint),
value);
break;
case VIRTIO_PCI_COMMON_GF:
assert(dev->cfg.guest_feature_select <= 1);
if (dev->cfg.guest_feature_select) {
value = dev->cfg.guest_feat_hi;
SPDK_DEBUGLOG(vfu_virtio, "%s: READ PCI_COMMON_GF_HI with 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint),
value);
} else {
value = dev->cfg.guest_feat_lo;
SPDK_DEBUGLOG(vfu_virtio, "%s: READ PCI_COMMON_GF_LO with 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint),
value);
}
break;
case VIRTIO_PCI_COMMON_MSIX:
value = dev->cfg.msix_config;
SPDK_DEBUGLOG(vfu_virtio, "%s: READ PCI_COMMON_MSIX with 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint),
value);
break;
case VIRTIO_PCI_COMMON_NUMQ:
value = dev->num_queues;
SPDK_DEBUGLOG(vfu_virtio, "%s: READ PCI_COMMON_NUMQ with 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint),
value);
break;
case VIRTIO_PCI_COMMON_STATUS:
value = dev->cfg.device_status;
SPDK_DEBUGLOG(vfu_virtio, "%s: READ PCI_COMMON_STATUS with 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint),
value);
break;
case VIRTIO_PCI_COMMON_CFGGENERATION:
value = dev->cfg.config_generation;
SPDK_DEBUGLOG(vfu_virtio, "%s: READ PCI_COMMON_CFGGENERATION with 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint),
value);
break;
case VIRTIO_PCI_COMMON_Q_NOFF:
value = dev->cfg.queue_select;
SPDK_DEBUGLOG(vfu_virtio, "%s: READ PCI_COMMON_Q_NOFF with 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint),
value);
break;
case VIRTIO_PCI_COMMON_Q_SELECT:
value = dev->cfg.queue_select;
SPDK_DEBUGLOG(vfu_virtio, "%s: READ PCI_COMMON_Q_SELECT with 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint),
value);
break;
case VIRTIO_PCI_COMMON_Q_SIZE:
value = dev->vqs[dev->cfg.queue_select].qsize;
SPDK_DEBUGLOG(vfu_virtio, "%s: READ queue %u PCI_COMMON_Q_SIZE with 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint),
dev->cfg.queue_select, value);
break;
case VIRTIO_PCI_COMMON_Q_MSIX:
value = dev->vqs[dev->cfg.queue_select].vector;
SPDK_DEBUGLOG(vfu_virtio, "%s: READ queue %u PCI_COMMON_Q_MSIX with 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint),
dev->cfg.queue_select, value);
break;
case VIRTIO_PCI_COMMON_Q_ENABLE:
value = dev->vqs[dev->cfg.queue_select].enabled;
SPDK_DEBUGLOG(vfu_virtio, "%s: READ queue %u PCI_COMMON_Q_ENABLE with 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), dev->cfg.queue_select, value);
break;
case VIRTIO_PCI_COMMON_Q_DESCLO:
value = dev->vqs[dev->cfg.queue_select].desc_lo;
SPDK_DEBUGLOG(vfu_virtio, "%s: READ queue %u PCI_COMMON_Q_DESCLO with 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), dev->cfg.queue_select, value);
break;
case VIRTIO_PCI_COMMON_Q_DESCHI:
value = dev->vqs[dev->cfg.queue_select].desc_hi;
SPDK_DEBUGLOG(vfu_virtio, "%s: READ queue %u PCI_COMMON_Q_DESCHI with 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), dev->cfg.queue_select, value);
break;
case VIRTIO_PCI_COMMON_Q_AVAILLO:
value = dev->vqs[dev->cfg.queue_select].avail_lo;
SPDK_DEBUGLOG(vfu_virtio, "%s: READ queue %u PCI_COMMON_Q_AVAILLO with 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), dev->cfg.queue_select, value);
break;
case VIRTIO_PCI_COMMON_Q_AVAILHI:
value = dev->vqs[dev->cfg.queue_select].avail_hi;
SPDK_DEBUGLOG(vfu_virtio, "%s: READ queue %u PCI_COMMON_Q_AVAILHI with 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), dev->cfg.queue_select, value);
break;
case VIRTIO_PCI_COMMON_Q_USEDLO:
value = dev->vqs[dev->cfg.queue_select].used_lo;
SPDK_DEBUGLOG(vfu_virtio, "%s: READ queue %u PCI_COMMON_Q_USEDLO with 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), dev->cfg.queue_select, value);
break;
case VIRTIO_PCI_COMMON_Q_USEDHI:
value = dev->vqs[dev->cfg.queue_select].used_hi;
SPDK_DEBUGLOG(vfu_virtio, "%s: READ queue %u PCI_COMMON_Q_USEDHI with 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), dev->cfg.queue_select, value);
break;
default:
SPDK_ERRLOG("%s: READ UNSUPPORTED offset 0x%x\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint), offset);
errno = EIO;
return -1;
}
memcpy(buf, &value, count);
}
return count;
}
static int
virtio_vfu_device_specific_cfg(struct vfu_virtio_endpoint *virtio_endpoint, char *buf,
size_t count, loff_t pos, bool is_write)
{
loff_t offset;
int ret = -1;
assert(count <= 8);
offset = pos - VIRTIO_PCI_SPECIFIC_CFG_OFFSET;
if (!is_write) {
if (virtio_endpoint->virtio_ops.get_config) {
ret = virtio_endpoint->virtio_ops.get_config(virtio_endpoint, buf, offset, count);
}
} else {
if (virtio_endpoint->virtio_ops.set_config) {
ret = virtio_endpoint->virtio_ops.set_config(virtio_endpoint, buf, offset, count);
}
}
if (ret < 0) {
return ret;
}
return count;
}
static int
virtio_vfu_pci_isr(struct vfu_virtio_endpoint *virtio_endpoint, char *buf,
size_t count, bool is_write)
{
uint8_t *isr;
if (count != 1) {
SPDK_ERRLOG("ISR register is 1 byte\n");
errno = EIO;
return -1;
}
isr = buf;
if (!is_write) {
SPDK_DEBUGLOG(vfu_virtio, "READ PCI ISR\n");
/* Read-Acknowledge Clear */
*isr = virtio_endpoint->dev->cfg.isr;
virtio_endpoint->dev->cfg.isr = 0;
} else {
SPDK_ERRLOG("ISR register is RO\n");
errno = EIO;
return -1;
}
return count;
}
static ssize_t
virtio_vfu_access_bar4(vfu_ctx_t *vfu_ctx, char *buf, size_t count,
loff_t pos,
bool is_write)
{
struct spdk_vfu_endpoint *endpoint = vfu_get_private(vfu_ctx);
struct vfu_virtio_endpoint *virtio_endpoint = spdk_vfu_get_endpoint_private(endpoint);
uint64_t start, end;
start = pos;
end = start + count;
SPDK_DEBUGLOG(vfu_virtio, "%s: %s bar4 0x%"PRIX64"-0x%"PRIX64", len = %lu\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint),
is_write ? "write" : "read", start, end - 1, count);
if (end < VIRTIO_PCI_COMMON_CFG_OFFSET + VIRTIO_PCI_COMMON_CFG_LENGTH) {
/* virtio PCI common configuration */
return virtio_vfu_pci_common_cfg(virtio_endpoint, buf, count, pos, is_write);
} else if (start >= VIRTIO_PCI_ISR_ACCESS_OFFSET &&
end < VIRTIO_PCI_ISR_ACCESS_OFFSET + VIRTIO_PCI_ISR_ACCESS_LENGTH) {
/* ISR access */
return virtio_vfu_pci_isr(virtio_endpoint, buf, count, is_write);
} else if (start >= VIRTIO_PCI_SPECIFIC_CFG_OFFSET &&
end < VIRTIO_PCI_SPECIFIC_CFG_OFFSET + VIRTIO_PCI_SPECIFIC_CFG_LENGTH) {
/* Device specific configuration */
return virtio_vfu_device_specific_cfg(virtio_endpoint, buf, count, pos, is_write);
} else if (start >= VIRTIO_PCI_NOTIFICATIONS_OFFSET &&
end < VIRTIO_PCI_NOTIFICATIONS_OFFSET + VIRTIO_PCI_NOTIFICATIONS_LENGTH) {
/* Notifications */
/* Sparse mmap region by default, there are no MMIO R/W messages */
assert(false);
return count;
} else {
assert(false);
}
return 0;
}
int
vfu_virtio_post_memory_add(struct spdk_vfu_endpoint *endpoint, void *map_start, void *map_end)
{
struct vfu_virtio_endpoint *virtio_endpoint = spdk_vfu_get_endpoint_private(endpoint);
struct vfu_virtio_dev *dev = virtio_endpoint->dev;
uint32_t i;
if (!dev) {
return 0;
}
for (i = 0; i < dev->num_queues; i++) {
/* Try to remap VQs if necessary */
virtio_dev_map_vq(dev, &dev->vqs[i]);
}
return 0;
}
int
vfu_virtio_pre_memory_remove(struct spdk_vfu_endpoint *endpoint, void *map_start, void *map_end)
{
struct vfu_virtio_endpoint *virtio_endpoint = spdk_vfu_get_endpoint_private(endpoint);
if (virtio_endpoint->dev != NULL) {
vfu_virtio_dev_unmap_vqs(virtio_endpoint->dev, map_start, map_end);
}
return 0;
}
int
vfu_virtio_pci_reset_cb(struct spdk_vfu_endpoint *endpoint)
{
struct vfu_virtio_endpoint *virtio_endpoint = spdk_vfu_get_endpoint_private(endpoint);
if (virtio_endpoint->dev) {
vfu_virtio_dev_stop(virtio_endpoint->dev);
vfu_virtio_dev_reset(virtio_endpoint->dev);
}
return 0;
}
static ssize_t
access_pci_config(vfu_ctx_t *vfu_ctx, char *buf, size_t count, loff_t offset,
bool is_write)
{
struct spdk_vfu_endpoint *endpoint = vfu_get_private(vfu_ctx);
void *pci_config = spdk_vfu_endpoint_get_pci_config(endpoint);
SPDK_DEBUGLOG(vfu_virtio,
"%s: PCI_CFG %s %#lx-%#lx\n",
spdk_vfu_get_endpoint_id(endpoint), is_write ? "write" : "read",
offset, offset + count);
if (is_write) {
SPDK_ERRLOG("write %#lx-%#lx not supported\n",
offset, offset + count);
errno = EINVAL;
return -1;
}
if (offset + count > 0x1000) {
SPDK_ERRLOG("access past end of extended PCI configuration space, want=%ld+%ld, max=%d\n",
offset, count, 0x1000);
errno = ERANGE;
return -1;
}
memcpy(buf, ((unsigned char *)pci_config) + offset, count);
return count;
}
static int
vfu_virtio_dev_start(struct vfu_virtio_dev *dev)
{
struct vfu_virtio_endpoint *virtio_endpoint = dev->virtio_endpoint;
int ret = 0;
SPDK_DEBUGLOG(vfu_virtio, "start %s\n", dev->name);
if (virtio_dev_is_started(dev)) {
SPDK_ERRLOG("Device %s is already started\n", dev->name);
return -EFAULT;
}
if (virtio_endpoint->virtio_ops.start_device) {
virtio_endpoint->io_outstanding = 0;
ret = virtio_endpoint->virtio_ops.start_device(virtio_endpoint);
}
SPDK_DEBUGLOG(vfu_virtio, "%s is started with ret %d\n", dev->name, ret);
return ret;
}
static int
vfu_virtio_dev_stop(struct vfu_virtio_dev *dev)
{
struct vfu_virtio_endpoint *virtio_endpoint = dev->virtio_endpoint;
int ret = 0;
SPDK_DEBUGLOG(vfu_virtio, "stop %s\n", dev->name);
if (!virtio_dev_is_started(dev)) {
SPDK_DEBUGLOG(vfu_virtio, "%s isn't started\n", dev->name);
return 0;
}
if (virtio_endpoint->virtio_ops.stop_device) {
ret = virtio_endpoint->virtio_ops.stop_device(virtio_endpoint);
assert(ret == 0);
}
/* Unmap all VQs */
vfu_virtio_dev_unmap_vqs(dev, NULL, NULL);
return ret;
}
int
vfu_virtio_detach_device(struct spdk_vfu_endpoint *endpoint)
{
struct vfu_virtio_endpoint *virtio_endpoint = spdk_vfu_get_endpoint_private(endpoint);
struct vfu_virtio_dev *dev = virtio_endpoint->dev;
if (virtio_endpoint->dev == NULL) {
return 0;
}
SPDK_DEBUGLOG(vfu_virtio, "detach device %s\n", dev->name);
vfu_virtio_dev_stop(dev);
vfu_virtio_dev_free_reqs(virtio_endpoint, dev);
virtio_endpoint->dev = NULL;
free(dev);
return 0;
}
int
vfu_virtio_attach_device(struct spdk_vfu_endpoint *endpoint)
{
struct vfu_virtio_endpoint *virtio_endpoint = spdk_vfu_get_endpoint_private(endpoint);
uint64_t supported_features = 0;
struct vfu_virtio_dev *dev;
struct vfu_virtio_vq *vq;
struct vfu_virtio_req *req;
uint32_t i, j;
int ret = 0;
dev = calloc(1, sizeof(*dev) + virtio_endpoint->num_queues * 3 * dma_sg_size());
if (dev == NULL) {
return -ENOMEM;
}
dev->num_queues = virtio_endpoint->num_queues;
for (i = 0; i < dev->num_queues; i++) {
vq = &dev->vqs[i];
vq->id = i;
vq->qsize = virtio_endpoint->qsize;
vq->avail.sg = (dma_sg_t *)(dev->sg + i * dma_sg_size() * 3);
vq->used.sg = (dma_sg_t *)((uint8_t *)vq->avail.sg + dma_sg_size());
vq->desc.sg = (dma_sg_t *)((uint8_t *)vq->used.sg + dma_sg_size());
STAILQ_INIT(&vq->free_reqs);
for (j = 0; j <= vq->qsize; j++) {
req = vfu_virtio_vq_alloc_req(virtio_endpoint, vq);
if (!req) {
SPDK_ERRLOG("Error to allocate req\n");
ret = -ENOMEM;
goto out;
}
req->indirect_iov = &req->iovs[VIRTIO_DEV_MAX_IOVS];
req->indirect_sg = virtio_req_to_sg_t(req, VIRTIO_DEV_MAX_IOVS);
req->dev = dev;
req->vq = vq;
STAILQ_INSERT_TAIL(&vq->free_reqs, req, link);
}
}
if (virtio_endpoint->virtio_ops.get_device_features) {
supported_features = virtio_endpoint->virtio_ops.get_device_features(virtio_endpoint);
}
dev->host_features = supported_features;
snprintf(dev->name, SPDK_VFU_MAX_NAME_LEN, "%s",
spdk_vfu_get_endpoint_name(virtio_endpoint->endpoint));
virtio_endpoint->dev = dev;
dev->virtio_endpoint = virtio_endpoint;
virtio_endpoint->thread = spdk_get_thread();
return 0;
out:
vfu_virtio_dev_free_reqs(virtio_endpoint, dev);
return ret;
}
int
vfu_virtio_endpoint_setup(struct vfu_virtio_endpoint *virtio_endpoint,
struct spdk_vfu_endpoint *endpoint,
char *basename, const char *endpoint_name,
struct vfu_virtio_ops *ops)
{
char path[PATH_MAX] = "";
int ret;
if (!ops) {
return -EINVAL;
}
ret = snprintf(path, PATH_MAX, "%s%s_bar4", basename, endpoint_name);
if (ret < 0 || ret >= PATH_MAX) {
SPDK_ERRLOG("%s: error to get socket path: %s.\n", basename, spdk_strerror(errno));
return -EINVAL;
}
ret = open(path, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
if (ret == -1) {
SPDK_ERRLOG("%s: failed to open device memory at %s.\n",
path, spdk_strerror(errno));
return ret;
}
unlink(path);
virtio_endpoint->devmem_fd = ret;
ret = ftruncate(virtio_endpoint->devmem_fd, VIRTIO_PCI_BAR4_LENGTH);
if (ret != 0) {
SPDK_ERRLOG("%s: error to ftruncate file %s.\n", path,
spdk_strerror(errno));
close(virtio_endpoint->devmem_fd);
return ret;
}
virtio_endpoint->doorbells = mmap(NULL, VIRTIO_PCI_NOTIFICATIONS_LENGTH, PROT_READ | PROT_WRITE,
MAP_SHARED,
virtio_endpoint->devmem_fd, VIRTIO_PCI_NOTIFICATIONS_OFFSET);
if (virtio_endpoint->doorbells == MAP_FAILED) {
SPDK_ERRLOG("%s: error to mmap file %s.\n", path, spdk_strerror(errno));
close(virtio_endpoint->devmem_fd);
return -EFAULT;
}
virtio_endpoint->endpoint = endpoint;
virtio_endpoint->virtio_ops = *ops;
virtio_endpoint->num_queues = VIRTIO_DEV_MAX_VQS;
virtio_endpoint->qsize = VIRTIO_VQ_DEFAULT_SIZE;
SPDK_DEBUGLOG(vfu_virtio, "mmap file %s, devmem_fd %d\n", path, virtio_endpoint->devmem_fd);
return 0;
}
int
vfu_virtio_endpoint_destruct(struct vfu_virtio_endpoint *virtio_endpoint)
{
if (virtio_endpoint->doorbells) {
munmap((void *)virtio_endpoint->doorbells, VIRTIO_PCI_NOTIFICATIONS_LENGTH);
}
if (virtio_endpoint->devmem_fd) {
close(virtio_endpoint->devmem_fd);
}
return 0;
}
static int
vfu_virtio_quiesce_poll(void *ctx)
{
struct vfu_virtio_endpoint *virtio_endpoint = ctx;
vfu_ctx_t *vfu_ctx = spdk_vfu_get_vfu_ctx(virtio_endpoint->endpoint);
if (virtio_endpoint->io_outstanding) {
return SPDK_POLLER_IDLE;
}
spdk_poller_unregister(&virtio_endpoint->quiesce_poller);
virtio_endpoint->quiesce_in_progress = false;
vfu_device_quiesced(vfu_ctx, 0);
return SPDK_POLLER_BUSY;
}
int
vfu_virtio_quiesce_cb(struct spdk_vfu_endpoint *endpoint)
{
struct vfu_virtio_endpoint *virtio_endpoint = spdk_vfu_get_endpoint_private(endpoint);
if (virtio_endpoint->quiesce_in_progress) {
return -EBUSY;
}
if (!virtio_endpoint->io_outstanding) {
return 0;
}
virtio_endpoint->quiesce_in_progress = true;
virtio_endpoint->quiesce_poller = SPDK_POLLER_REGISTER(vfu_virtio_quiesce_poll, virtio_endpoint,
10);
return -EBUSY;
}
static struct spdk_vfu_pci_device vfu_virtio_device_info = {
.id = {
.vid = SPDK_PCI_VID_VIRTIO,
/* Realize when calling get device information */
.did = 0x0,
.ssvid = SPDK_PCI_VID_VIRTIO,
.ssid = 0x0,
},
.class = {
/* 0x01, mass storage controller */
.bcc = 0x01,
/* 0x00, SCSI controller */
.scc = 0x00,
/* 0x00, SCSI controller - vendor specific interface */
.pi = 0x00,
},
.pmcap = {
.hdr.id = PCI_CAP_ID_PM,
.pmcs.nsfrst = 0x1,
},
.pxcap = {
.hdr.id = PCI_CAP_ID_EXP,
.pxcaps.ver = 0x2,
.pxdcap = {.rer = 0x1, .flrc = 0x1},
.pxdcap2.ctds = 0x1,
},
.msixcap = {
.hdr.id = PCI_CAP_ID_MSIX,
.mxc.ts = VIRTIO_DEV_MAX_VQS - 1,
.mtab = {.tbir = 0x1, .to = 0x0},
.mpba = {.pbir = 0x2, .pbao = 0x0},
},
.nr_vendor_caps = 4,
.intr_ipin = 0x1,
.nr_int_irqs = 0x1,
.nr_msix_irqs = VIRTIO_DEV_MAX_VQS,
.regions = {
/* BAR0 */
{0},
/* BAR1 */
{
.access_cb = NULL,
.offset = 0,
.fd = -1,
.len = 0x1000,
.flags = VFU_REGION_FLAG_RW,
.nr_sparse_mmaps = 0,
},
/* BAR2 */
{
.access_cb = NULL,
.offset = 0,
.fd = -1,
.len = 0x1000,
.flags = VFU_REGION_FLAG_RW,
.nr_sparse_mmaps = 0,
},
/* BAR3 */
{0},
/* BAR4 */
{
.access_cb = virtio_vfu_access_bar4,
.offset = 0,
.fd = -1,
.len = VIRTIO_PCI_BAR4_LENGTH,
.flags = VFU_REGION_FLAG_RW | VFU_REGION_FLAG_MEM,
.nr_sparse_mmaps = 1,
.mmaps = {
{
.offset = VIRTIO_PCI_NOTIFICATIONS_OFFSET,
.len = VIRTIO_PCI_NOTIFICATIONS_LENGTH,
},
},
},
/* BAR5 */
{0},
/* BAR6 */
{0},
/* ROM */
{0},
/* PCI Config */
{
.access_cb = access_pci_config,
.offset = 0,
.fd = -1,
.len = 0x1000,
.flags = VFU_REGION_FLAG_RW,
.nr_sparse_mmaps = 0,
},
},
};
void
vfu_virtio_get_device_info(struct vfu_virtio_endpoint *virtio_endpoint,
struct spdk_vfu_pci_device *device_info)
{
memcpy(device_info, &vfu_virtio_device_info, sizeof(*device_info));
/* BAR4 Region FD */
device_info->regions[VFU_PCI_DEV_BAR4_REGION_IDX].fd = virtio_endpoint->devmem_fd;
SPDK_DEBUGLOG(vfu_virtio, "%s: get device information, fd %d\n",
spdk_vfu_get_endpoint_id(virtio_endpoint->endpoint),
virtio_endpoint->devmem_fd);
}
static struct virtio_pci_cap common_cap = {
.cap_vndr = PCI_CAP_ID_VNDR,
.cap_len = sizeof(common_cap),
.cfg_type = VIRTIO_PCI_CAP_COMMON_CFG,
.bar = 4,
.offset = VIRTIO_PCI_COMMON_CFG_OFFSET,
.length = VIRTIO_PCI_COMMON_CFG_LENGTH,
};
static struct virtio_pci_cap isr_cap = {
.cap_vndr = PCI_CAP_ID_VNDR,
.cap_len = sizeof(isr_cap),
.cfg_type = VIRTIO_PCI_CAP_ISR_CFG,
.bar = 4,
.offset = VIRTIO_PCI_ISR_ACCESS_OFFSET,
.length = VIRTIO_PCI_ISR_ACCESS_LENGTH,
};
static struct virtio_pci_cap dev_cap = {
.cap_vndr = PCI_CAP_ID_VNDR,
.cap_len = sizeof(dev_cap),
.cfg_type = VIRTIO_PCI_CAP_DEVICE_CFG,
.bar = 4,
.offset = VIRTIO_PCI_SPECIFIC_CFG_OFFSET,
.length = VIRTIO_PCI_SPECIFIC_CFG_LENGTH,
};
static struct virtio_pci_notify_cap notify_cap = {
.cap = {
.cap_vndr = PCI_CAP_ID_VNDR,
.cap_len = sizeof(notify_cap),
.cfg_type = VIRTIO_PCI_CAP_NOTIFY_CFG,
.bar = 4,
.offset = VIRTIO_PCI_NOTIFICATIONS_OFFSET,
.length = VIRTIO_PCI_NOTIFICATIONS_LENGTH,
},
.notify_off_multiplier = 4,
};
uint16_t
vfu_virtio_get_vendor_capability(struct spdk_vfu_endpoint *endpoint, char *buf,
uint16_t buf_len,
uint16_t idx)
{
uint16_t len;
SPDK_DEBUGLOG(vfu_virtio, "%s: get vendor capability, idx %u\n",
spdk_vfu_get_endpoint_id(endpoint), idx);
switch (idx) {
case 0:
assert(buf_len > sizeof(common_cap));
memcpy(buf, &common_cap, sizeof(common_cap));
len = sizeof(common_cap);
break;
case 1:
assert(buf_len > sizeof(isr_cap));
memcpy(buf, &isr_cap, sizeof(isr_cap));
len = sizeof(isr_cap);
break;
case 2:
assert(buf_len > sizeof(dev_cap));
memcpy(buf, &dev_cap, sizeof(dev_cap));
len = sizeof(dev_cap);
break;
case 3:
assert(buf_len > sizeof(notify_cap));
memcpy(buf, &notify_cap, sizeof(notify_cap));
len = sizeof(notify_cap);
break;
default:
return 0;
}
return len;
}
SPDK_LOG_REGISTER_COMPONENT(vfu_virtio)
SPDK_LOG_REGISTER_COMPONENT(vfu_virtio_io)