Spdk/lib/virtio/virtio_vhost_user.c

1044 lines
26 KiB
C
Raw Normal View History

/* SPDX-License-Identifier: BSD-3-Clause
* Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
* All rights reserved.
*/
#include "spdk/stdinc.h"
#include <sys/eventfd.h>
#include "spdk/string.h"
#include "spdk/config.h"
#include "spdk/util.h"
#include "spdk_internal/virtio.h"
#include "spdk_internal/vhost_user.h"
/* The version of the protocol we support */
#define VHOST_USER_VERSION 0x1
#define VIRTIO_USER_SUPPORTED_PROTOCOL_FEATURES \
((1ULL << VHOST_USER_PROTOCOL_F_MQ) | \
(1ULL << VHOST_USER_PROTOCOL_F_CONFIG))
struct virtio_user_dev {
int vhostfd;
int callfds[SPDK_VIRTIO_MAX_VIRTQUEUES];
int kickfds[SPDK_VIRTIO_MAX_VIRTQUEUES];
uint32_t queue_size;
uint8_t status;
char path[PATH_MAX];
uint64_t protocol_features;
struct vring vrings[SPDK_VIRTIO_MAX_VIRTQUEUES];
struct spdk_mem_map *mem_map;
};
static int
vhost_user_write(int fd, void *buf, int len, int *fds, int fd_num)
{
int r;
struct msghdr msgh;
struct iovec iov;
size_t fd_size = fd_num * sizeof(int);
char control[CMSG_SPACE(fd_size)];
struct cmsghdr *cmsg;
memset(&msgh, 0, sizeof(msgh));
memset(control, 0, sizeof(control));
iov.iov_base = (uint8_t *)buf;
iov.iov_len = len;
msgh.msg_iov = &iov;
msgh.msg_iovlen = 1;
if (fds && fd_num > 0) {
msgh.msg_control = control;
msgh.msg_controllen = sizeof(control);
cmsg = CMSG_FIRSTHDR(&msgh);
cmsg->cmsg_len = CMSG_LEN(fd_size);
cmsg->cmsg_level = SOL_SOCKET;
cmsg->cmsg_type = SCM_RIGHTS;
memcpy(CMSG_DATA(cmsg), fds, fd_size);
} else {
msgh.msg_control = NULL;
msgh.msg_controllen = 0;
}
do {
r = sendmsg(fd, &msgh, 0);
} while (r < 0 && errno == EINTR);
if (r == -1) {
return -errno;
}
return 0;
}
static int
vhost_user_read(int fd, struct vhost_user_msg *msg)
{
uint32_t valid_flags = VHOST_USER_REPLY_MASK | VHOST_USER_VERSION;
ssize_t ret;
size_t sz_hdr = VHOST_USER_HDR_SIZE, sz_payload;
ret = recv(fd, (void *)msg, sz_hdr, 0);
if ((size_t)ret != sz_hdr) {
SPDK_WARNLOG("Failed to recv msg hdr: %zd instead of %zu.\n",
ret, sz_hdr);
if (ret == -1) {
return -errno;
} else {
return -EBUSY;
}
}
/* validate msg flags */
if (msg->flags != (valid_flags)) {
SPDK_WARNLOG("Failed to recv msg: flags %"PRIx32" instead of %"PRIx32".\n",
msg->flags, valid_flags);
return -EIO;
}
sz_payload = msg->size;
if (sz_payload > VHOST_USER_PAYLOAD_SIZE) {
SPDK_WARNLOG("Received oversized msg: payload size %zu > available space %zu\n",
sz_payload, VHOST_USER_PAYLOAD_SIZE);
return -EIO;
}
if (sz_payload) {
ret = recv(fd, (void *)((char *)msg + sz_hdr), sz_payload, 0);
if ((size_t)ret != sz_payload) {
SPDK_WARNLOG("Failed to recv msg payload: %zd instead of %"PRIu32".\n",
ret, msg->size);
if (ret == -1) {
return -errno;
} else {
return -EBUSY;
}
}
}
return 0;
}
struct hugepage_file_info {
uint64_t addr; /**< virtual addr */
size_t size; /**< the file size */
char path[PATH_MAX]; /**< path to backing file */
};
/* Two possible options:
* 1. Match HUGEPAGE_INFO_FMT to find the file storing struct hugepage_file
* array. This is simple but cannot be used in secondary process because
* secondary process will close and munmap that file.
* 2. Match HUGEFILE_FMT to find hugepage files directly.
*
* We choose option 2.
*/
static int
get_hugepage_file_info(struct hugepage_file_info hugepages[], int max)
{
int idx, rc;
FILE *f;
char buf[BUFSIZ], *tmp, *tail;
char *str_underline, *str_start;
int huge_index;
uint64_t v_start, v_end;
f = fopen("/proc/self/maps", "r");
if (!f) {
SPDK_ERRLOG("cannot open /proc/self/maps\n");
rc = -errno;
assert(rc < 0); /* scan-build hack */
return rc;
}
idx = 0;
while (fgets(buf, sizeof(buf), f) != NULL) {
if (sscanf(buf, "%" PRIx64 "-%" PRIx64, &v_start, &v_end) < 2) {
SPDK_ERRLOG("Failed to parse address\n");
rc = -EIO;
goto out;
}
tmp = strchr(buf, ' ') + 1; /** skip address */
tmp = strchr(tmp, ' ') + 1; /** skip perm */
tmp = strchr(tmp, ' ') + 1; /** skip offset */
tmp = strchr(tmp, ' ') + 1; /** skip dev */
tmp = strchr(tmp, ' ') + 1; /** skip inode */
while (*tmp == ' ') { /** skip spaces */
tmp++;
}
tail = strrchr(tmp, '\n'); /** remove newline if exists */
if (tail) {
*tail = '\0';
}
/* Match HUGEFILE_FMT, aka "%s/%smap_%d",
* which is defined in eal_filesystem.h
*/
str_underline = strrchr(tmp, '_');
if (!str_underline) {
continue;
}
str_start = str_underline - strlen("map");
if (str_start < tmp) {
continue;
}
if (sscanf(str_start, "map_%d", &huge_index) != 1) {
continue;
}
if (idx >= max) {
SPDK_ERRLOG("Exceed maximum of %d\n", max);
rc = -ENOSPC;
goto out;
}
if (idx > 0 &&
strncmp(tmp, hugepages[idx - 1].path, PATH_MAX) == 0 &&
v_start == hugepages[idx - 1].addr + hugepages[idx - 1].size) {
hugepages[idx - 1].size += (v_end - v_start);
continue;
}
hugepages[idx].addr = v_start;
hugepages[idx].size = v_end - v_start;
snprintf(hugepages[idx].path, PATH_MAX, "%s", tmp);
idx++;
}
rc = idx;
out:
fclose(f);
return rc;
}
static int
prepare_vhost_memory_user(struct vhost_user_msg *msg, int fds[])
{
int i, num;
struct hugepage_file_info hugepages[VHOST_USER_MEMORY_MAX_NREGIONS];
num = get_hugepage_file_info(hugepages, VHOST_USER_MEMORY_MAX_NREGIONS);
if (num < 0) {
SPDK_ERRLOG("Failed to prepare memory for vhost-user\n");
return num;
}
for (i = 0; i < num; ++i) {
/* the memory regions are unaligned */
msg->payload.memory.regions[i].guest_phys_addr = hugepages[i].addr; /* use vaddr! */
msg->payload.memory.regions[i].userspace_addr = hugepages[i].addr;
msg->payload.memory.regions[i].memory_size = hugepages[i].size;
msg->payload.memory.regions[i].flags_padding = 0;
fds[i] = open(hugepages[i].path, O_RDWR);
}
msg->payload.memory.nregions = num;
msg->payload.memory.padding = 0;
return 0;
}
static const char *const vhost_msg_strings[VHOST_USER_MAX] = {
[VHOST_USER_SET_OWNER] = "VHOST_SET_OWNER",
[VHOST_USER_RESET_OWNER] = "VHOST_RESET_OWNER",
[VHOST_USER_SET_FEATURES] = "VHOST_SET_FEATURES",
[VHOST_USER_GET_FEATURES] = "VHOST_GET_FEATURES",
[VHOST_USER_SET_VRING_CALL] = "VHOST_SET_VRING_CALL",
[VHOST_USER_GET_PROTOCOL_FEATURES] = "VHOST_USER_GET_PROTOCOL_FEATURES",
[VHOST_USER_SET_PROTOCOL_FEATURES] = "VHOST_USER_SET_PROTOCOL_FEATURES",
[VHOST_USER_SET_VRING_NUM] = "VHOST_SET_VRING_NUM",
[VHOST_USER_SET_VRING_BASE] = "VHOST_SET_VRING_BASE",
[VHOST_USER_GET_VRING_BASE] = "VHOST_GET_VRING_BASE",
[VHOST_USER_SET_VRING_ADDR] = "VHOST_SET_VRING_ADDR",
[VHOST_USER_SET_VRING_KICK] = "VHOST_SET_VRING_KICK",
[VHOST_USER_SET_MEM_TABLE] = "VHOST_SET_MEM_TABLE",
[VHOST_USER_SET_VRING_ENABLE] = "VHOST_SET_VRING_ENABLE",
[VHOST_USER_GET_QUEUE_NUM] = "VHOST_USER_GET_QUEUE_NUM",
[VHOST_USER_GET_CONFIG] = "VHOST_USER_GET_CONFIG",
[VHOST_USER_SET_CONFIG] = "VHOST_USER_SET_CONFIG",
};
static int
vhost_user_sock(struct virtio_user_dev *dev,
enum vhost_user_request req,
void *arg)
{
struct vhost_user_msg msg;
struct vhost_vring_file *file = 0;
int need_reply = 0;
int fds[VHOST_USER_MEMORY_MAX_NREGIONS];
int fd_num = 0;
int i, len, rc;
int vhostfd = dev->vhostfd;
SPDK_DEBUGLOG(virtio_user, "sent message %d = %s\n", req, vhost_msg_strings[req]);
msg.request = req;
msg.flags = VHOST_USER_VERSION;
msg.size = 0;
switch (req) {
case VHOST_USER_GET_FEATURES:
case VHOST_USER_GET_PROTOCOL_FEATURES:
case VHOST_USER_GET_QUEUE_NUM:
need_reply = 1;
break;
case VHOST_USER_SET_FEATURES:
case VHOST_USER_SET_LOG_BASE:
case VHOST_USER_SET_PROTOCOL_FEATURES:
msg.payload.u64 = *((__u64 *)arg);
msg.size = sizeof(msg.payload.u64);
break;
case VHOST_USER_SET_OWNER:
case VHOST_USER_RESET_OWNER:
break;
case VHOST_USER_SET_MEM_TABLE:
rc = prepare_vhost_memory_user(&msg, fds);
if (rc < 0) {
return rc;
}
fd_num = msg.payload.memory.nregions;
msg.size = sizeof(msg.payload.memory.nregions);
msg.size += sizeof(msg.payload.memory.padding);
msg.size += fd_num * sizeof(struct vhost_memory_region);
break;
case VHOST_USER_SET_LOG_FD:
fds[fd_num++] = *((int *)arg);
break;
case VHOST_USER_SET_VRING_NUM:
case VHOST_USER_SET_VRING_BASE:
case VHOST_USER_SET_VRING_ENABLE:
memcpy(&msg.payload.state, arg, sizeof(msg.payload.state));
msg.size = sizeof(msg.payload.state);
break;
case VHOST_USER_GET_VRING_BASE:
memcpy(&msg.payload.state, arg, sizeof(msg.payload.state));
msg.size = sizeof(msg.payload.state);
need_reply = 1;
break;
case VHOST_USER_SET_VRING_ADDR:
memcpy(&msg.payload.addr, arg, sizeof(msg.payload.addr));
msg.size = sizeof(msg.payload.addr);
break;
case VHOST_USER_SET_VRING_KICK:
case VHOST_USER_SET_VRING_CALL:
case VHOST_USER_SET_VRING_ERR:
file = arg;
msg.payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK;
msg.size = sizeof(msg.payload.u64);
if (file->fd > 0) {
fds[fd_num++] = file->fd;
} else {
msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK;
}
break;
case VHOST_USER_GET_CONFIG:
memcpy(&msg.payload.cfg, arg, sizeof(msg.payload.cfg));
msg.size = sizeof(msg.payload.cfg);
need_reply = 1;
break;
case VHOST_USER_SET_CONFIG:
memcpy(&msg.payload.cfg, arg, sizeof(msg.payload.cfg));
msg.size = sizeof(msg.payload.cfg);
break;
default:
SPDK_ERRLOG("trying to send unknown msg\n");
return -EINVAL;
}
len = VHOST_USER_HDR_SIZE + msg.size;
rc = vhost_user_write(vhostfd, &msg, len, fds, fd_num);
if (rc < 0) {
SPDK_ERRLOG("%s failed: %s\n",
vhost_msg_strings[req], spdk_strerror(-rc));
return rc;
}
if (req == VHOST_USER_SET_MEM_TABLE)
for (i = 0; i < fd_num; ++i) {
close(fds[i]);
}
if (need_reply) {
rc = vhost_user_read(vhostfd, &msg);
if (rc < 0) {
SPDK_WARNLOG("Received msg failed: %s\n", spdk_strerror(-rc));
return rc;
}
if (req != msg.request) {
SPDK_WARNLOG("Received unexpected msg type\n");
return -EIO;
}
switch (req) {
case VHOST_USER_GET_FEATURES:
case VHOST_USER_GET_PROTOCOL_FEATURES:
case VHOST_USER_GET_QUEUE_NUM:
if (msg.size != sizeof(msg.payload.u64)) {
SPDK_WARNLOG("Received bad msg size\n");
return -EIO;
}
*((__u64 *)arg) = msg.payload.u64;
break;
case VHOST_USER_GET_VRING_BASE:
if (msg.size != sizeof(msg.payload.state)) {
SPDK_WARNLOG("Received bad msg size\n");
return -EIO;
}
memcpy(arg, &msg.payload.state,
sizeof(struct vhost_vring_state));
break;
case VHOST_USER_GET_CONFIG:
if (msg.size != sizeof(msg.payload.cfg)) {
SPDK_WARNLOG("Received bad msg size\n");
return -EIO;
}
memcpy(arg, &msg.payload.cfg, sizeof(msg.payload.cfg));
break;
default:
SPDK_WARNLOG("Received unexpected msg type\n");
return -EBADMSG;
}
}
return 0;
}
/**
* Set up environment to talk with a vhost user backend.
*
* @return
* - (-1) if fail;
* - (0) if succeed.
*/
static int
vhost_user_setup(struct virtio_user_dev *dev)
{
int fd;
int flag;
struct sockaddr_un un;
ssize_t rc;
fd = socket(AF_UNIX, SOCK_STREAM, 0);
if (fd < 0) {
SPDK_ERRLOG("socket() error, %s\n", spdk_strerror(errno));
return -errno;
}
flag = fcntl(fd, F_GETFD);
if (fcntl(fd, F_SETFD, flag | FD_CLOEXEC) < 0) {
SPDK_ERRLOG("fcntl failed, %s\n", spdk_strerror(errno));
}
memset(&un, 0, sizeof(un));
un.sun_family = AF_UNIX;
rc = snprintf(un.sun_path, sizeof(un.sun_path), "%s", dev->path);
if (rc < 0 || (size_t)rc >= sizeof(un.sun_path)) {
SPDK_ERRLOG("socket path too long\n");
close(fd);
if (rc < 0) {
return -errno;
} else {
return -EINVAL;
}
}
if (connect(fd, (struct sockaddr *)&un, sizeof(un)) < 0) {
SPDK_ERRLOG("connect error, %s\n", spdk_strerror(errno));
close(fd);
return -errno;
}
dev->vhostfd = fd;
return 0;
}
static int
virtio_user_create_queue(struct virtio_dev *vdev, uint32_t queue_sel)
{
struct virtio_user_dev *dev = vdev->ctx;
/* Of all per virtqueue MSGs, make sure VHOST_SET_VRING_CALL come
* firstly because vhost depends on this msg to allocate virtqueue
* pair.
*/
struct vhost_vring_file file;
file.index = queue_sel;
file.fd = dev->callfds[queue_sel];
return vhost_user_sock(dev, VHOST_USER_SET_VRING_CALL, &file);
}
static int
virtio_user_set_vring_addr(struct virtio_dev *vdev, uint32_t queue_sel)
{
struct virtio_user_dev *dev = vdev->ctx;
struct vring *vring = &dev->vrings[queue_sel];
struct vhost_vring_addr addr = {
.index = queue_sel,
.desc_user_addr = (uint64_t)(uintptr_t)vring->desc,
.avail_user_addr = (uint64_t)(uintptr_t)vring->avail,
.used_user_addr = (uint64_t)(uintptr_t)vring->used,
.log_guest_addr = 0,
.flags = 0, /* disable log */
};
return vhost_user_sock(dev, VHOST_USER_SET_VRING_ADDR, &addr);
}
static int
virtio_user_kick_queue(struct virtio_dev *vdev, uint32_t queue_sel)
{
struct virtio_user_dev *dev = vdev->ctx;
struct vhost_vring_file file;
struct vhost_vring_state state;
struct vring *vring = &dev->vrings[queue_sel];
int rc;
state.index = queue_sel;
state.num = vring->num;
rc = vhost_user_sock(dev, VHOST_USER_SET_VRING_NUM, &state);
if (rc < 0) {
return rc;
}
state.index = queue_sel;
state.num = 0; /* no reservation */
rc = vhost_user_sock(dev, VHOST_USER_SET_VRING_BASE, &state);
if (rc < 0) {
return rc;
}
virtio_user_set_vring_addr(vdev, queue_sel);
/* Of all per virtqueue MSGs, make sure VHOST_USER_SET_VRING_KICK comes
* lastly because vhost depends on this msg to judge if
* virtio is ready.
*/
file.index = queue_sel;
file.fd = dev->kickfds[queue_sel];
return vhost_user_sock(dev, VHOST_USER_SET_VRING_KICK, &file);
}
static int
virtio_user_stop_queue(struct virtio_dev *vdev, uint32_t queue_sel)
{
struct virtio_user_dev *dev = vdev->ctx;
struct vhost_vring_state state;
state.index = queue_sel;
state.num = 0;
return vhost_user_sock(dev, VHOST_USER_GET_VRING_BASE, &state);
}
static int
virtio_user_queue_setup(struct virtio_dev *vdev,
int (*fn)(struct virtio_dev *, uint32_t))
{
uint32_t i;
int rc;
for (i = 0; i < vdev->max_queues; ++i) {
rc = fn(vdev, i);
if (rc < 0) {
SPDK_ERRLOG("setup tx vq fails: %"PRIu32".\n", i);
return rc;
}
}
return 0;
}
static int
virtio_user_map_notify(void *cb_ctx, struct spdk_mem_map *map,
enum spdk_mem_map_notify_action action,
void *vaddr, size_t size)
{
struct virtio_dev *vdev = cb_ctx;
struct virtio_user_dev *dev = vdev->ctx;
uint64_t features;
int ret;
/* We have to resend all mappings anyway, so don't bother with any
* page tracking.
*/
ret = vhost_user_sock(dev, VHOST_USER_SET_MEM_TABLE, NULL);
if (ret < 0) {
return ret;
}
/* Since we might want to use that mapping straight away, we have to
* make sure the guest has already processed our SET_MEM_TABLE message.
* F_REPLY_ACK is just a feature and the host is not obliged to
* support it, so we send a simple message that always has a response
* and we wait for that response. Messages are always processed in order.
*/
return vhost_user_sock(dev, VHOST_USER_GET_FEATURES, &features);
}
static int
virtio_user_register_mem(struct virtio_dev *vdev)
{
struct virtio_user_dev *dev = vdev->ctx;
const struct spdk_mem_map_ops virtio_user_map_ops = {
.notify_cb = virtio_user_map_notify,
.are_contiguous = NULL
};
dev->mem_map = spdk_mem_map_alloc(0, &virtio_user_map_ops, vdev);
if (dev->mem_map == NULL) {
SPDK_ERRLOG("spdk_mem_map_alloc() failed\n");
return -1;
}
return 0;
}
static void
virtio_user_unregister_mem(struct virtio_dev *vdev)
{
struct virtio_user_dev *dev = vdev->ctx;
spdk_mem_map_free(&dev->mem_map);
}
static int
virtio_user_start_device(struct virtio_dev *vdev)
{
struct virtio_user_dev *dev = vdev->ctx;
uint64_t host_max_queues;
int ret;
if ((dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) == 0 &&
vdev->max_queues > 1 + vdev->fixed_queues_num) {
SPDK_WARNLOG("%s: requested %"PRIu16" request queues, but the "
"host doesn't support VHOST_USER_PROTOCOL_F_MQ. "
"Only one request queue will be used.\n",
vdev->name, vdev->max_queues - vdev->fixed_queues_num);
vdev->max_queues = 1 + vdev->fixed_queues_num;
}
/* negotiate the number of I/O queues. */
ret = vhost_user_sock(dev, VHOST_USER_GET_QUEUE_NUM, &host_max_queues);
if (ret < 0) {
return ret;
}
if (vdev->max_queues > host_max_queues + vdev->fixed_queues_num) {
SPDK_WARNLOG("%s: requested %"PRIu16" request queues"
"but only %"PRIu64" available\n",
vdev->name, vdev->max_queues - vdev->fixed_queues_num,
host_max_queues);
vdev->max_queues = host_max_queues;
}
/* tell vhost to create queues */
ret = virtio_user_queue_setup(vdev, virtio_user_create_queue);
if (ret < 0) {
return ret;
}
ret = virtio_user_register_mem(vdev);
if (ret < 0) {
return ret;
}
return virtio_user_queue_setup(vdev, virtio_user_kick_queue);
}
static int
virtio_user_stop_device(struct virtio_dev *vdev)
{
int ret;
ret = virtio_user_queue_setup(vdev, virtio_user_stop_queue);
/* a queue might fail to stop for various reasons, e.g. socket
* connection going down, but this mustn't prevent us from freeing
* the mem map.
*/
virtio_user_unregister_mem(vdev);
return ret;
}
static int
virtio_user_dev_setup(struct virtio_dev *vdev)
{
struct virtio_user_dev *dev = vdev->ctx;
uint16_t i;
dev->vhostfd = -1;
for (i = 0; i < SPDK_VIRTIO_MAX_VIRTQUEUES; ++i) {
dev->callfds[i] = -1;
dev->kickfds[i] = -1;
}
return vhost_user_setup(dev);
}
static int
virtio_user_read_dev_config(struct virtio_dev *vdev, size_t offset,
void *dst, int length)
{
struct virtio_user_dev *dev = vdev->ctx;
struct vhost_user_config cfg = {0};
int rc;
if ((dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_CONFIG)) == 0) {
return -ENOTSUP;
}
cfg.offset = 0;
cfg.size = VHOST_USER_MAX_CONFIG_SIZE;
rc = vhost_user_sock(dev, VHOST_USER_GET_CONFIG, &cfg);
if (rc < 0) {
SPDK_ERRLOG("get_config failed: %s\n", spdk_strerror(-rc));
return rc;
}
memcpy(dst, cfg.region + offset, length);
return 0;
}
static int
virtio_user_write_dev_config(struct virtio_dev *vdev, size_t offset,
const void *src, int length)
{
struct virtio_user_dev *dev = vdev->ctx;
struct vhost_user_config cfg = {0};
int rc;
if ((dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_CONFIG)) == 0) {
return -ENOTSUP;
}
cfg.offset = offset;
cfg.size = length;
memcpy(cfg.region, src, length);
rc = vhost_user_sock(dev, VHOST_USER_SET_CONFIG, &cfg);
if (rc < 0) {
SPDK_ERRLOG("set_config failed: %s\n", spdk_strerror(-rc));
return rc;
}
return 0;
}
static void
virtio_user_set_status(struct virtio_dev *vdev, uint8_t status)
{
struct virtio_user_dev *dev = vdev->ctx;
int rc = 0;
if ((dev->status & VIRTIO_CONFIG_S_NEEDS_RESET) &&
status != VIRTIO_CONFIG_S_RESET) {
rc = -1;
} else if (status & VIRTIO_CONFIG_S_DRIVER_OK) {
rc = virtio_user_start_device(vdev);
} else if (status == VIRTIO_CONFIG_S_RESET &&
(dev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
rc = virtio_user_stop_device(vdev);
}
if (rc != 0) {
dev->status |= VIRTIO_CONFIG_S_NEEDS_RESET;
} else {
dev->status = status;
}
}
static uint8_t
virtio_user_get_status(struct virtio_dev *vdev)
{
struct virtio_user_dev *dev = vdev->ctx;
return dev->status;
}
static uint64_t
virtio_user_get_features(struct virtio_dev *vdev)
{
struct virtio_user_dev *dev = vdev->ctx;
uint64_t features;
int rc;
rc = vhost_user_sock(dev, VHOST_USER_GET_FEATURES, &features);
if (rc < 0) {
SPDK_ERRLOG("get_features failed: %s\n", spdk_strerror(-rc));
return 0;
}
return features;
}
static int
virtio_user_set_features(struct virtio_dev *vdev, uint64_t features)
{
struct virtio_user_dev *dev = vdev->ctx;
uint64_t protocol_features;
int ret;
ret = vhost_user_sock(dev, VHOST_USER_SET_FEATURES, &features);
if (ret < 0) {
return ret;
}
vdev->negotiated_features = features;
vdev->modern = virtio_dev_has_feature(vdev, VIRTIO_F_VERSION_1);
if (!virtio_dev_has_feature(vdev, VHOST_USER_F_PROTOCOL_FEATURES)) {
/* nothing else to do */
return 0;
}
ret = vhost_user_sock(dev, VHOST_USER_GET_PROTOCOL_FEATURES, &protocol_features);
if (ret < 0) {
return ret;
}
protocol_features &= VIRTIO_USER_SUPPORTED_PROTOCOL_FEATURES;
ret = vhost_user_sock(dev, VHOST_USER_SET_PROTOCOL_FEATURES, &protocol_features);
if (ret < 0) {
return ret;
}
dev->protocol_features = protocol_features;
return 0;
}
static uint16_t
virtio_user_get_queue_size(struct virtio_dev *vdev, uint16_t queue_id)
{
struct virtio_user_dev *dev = vdev->ctx;
/* Currently each queue has same queue size */
return dev->queue_size;
}
static int
virtio_user_setup_queue(struct virtio_dev *vdev, struct virtqueue *vq)
{
struct virtio_user_dev *dev = vdev->ctx;
struct vhost_vring_state state;
uint16_t queue_idx = vq->vq_queue_index;
void *queue_mem;
uint64_t desc_addr, avail_addr, used_addr;
int callfd, kickfd, rc;
if (dev->callfds[queue_idx] != -1 || dev->kickfds[queue_idx] != -1) {
SPDK_ERRLOG("queue %"PRIu16" already exists\n", queue_idx);
return -EEXIST;
}
/* May use invalid flag, but some backend uses kickfd and
* callfd as criteria to judge if dev is alive. so finally we
* use real event_fd.
*/
callfd = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK);
if (callfd < 0) {
SPDK_ERRLOG("callfd error, %s\n", spdk_strerror(errno));
return -errno;
}
kickfd = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK);
if (kickfd < 0) {
SPDK_ERRLOG("kickfd error, %s\n", spdk_strerror(errno));
close(callfd);
return -errno;
}
queue_mem = spdk_zmalloc(vq->vq_ring_size, VIRTIO_PCI_VRING_ALIGN, NULL,
SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
if (queue_mem == NULL) {
close(kickfd);
close(callfd);
return -ENOMEM;
}
vq->vq_ring_mem = SPDK_VTOPHYS_ERROR;
vq->vq_ring_virt_mem = queue_mem;
state.index = vq->vq_queue_index;
state.num = vq->vq_nentries;
if (virtio_dev_has_feature(vdev, VHOST_USER_F_PROTOCOL_FEATURES)) {
rc = vhost_user_sock(dev, VHOST_USER_SET_VRING_ENABLE, &state);
if (rc < 0) {
SPDK_ERRLOG("failed to send VHOST_USER_SET_VRING_ENABLE: %s\n",
spdk_strerror(-rc));
close(kickfd);
close(callfd);
spdk_free(queue_mem);
return -rc;
}
}
dev->callfds[queue_idx] = callfd;
dev->kickfds[queue_idx] = kickfd;
desc_addr = (uintptr_t)vq->vq_ring_virt_mem;
avail_addr = desc_addr + vq->vq_nentries * sizeof(struct vring_desc);
used_addr = SPDK_ALIGN_CEIL(avail_addr + offsetof(struct vring_avail,
ring[vq->vq_nentries]),
VIRTIO_PCI_VRING_ALIGN);
dev->vrings[queue_idx].num = vq->vq_nentries;
dev->vrings[queue_idx].desc = (void *)(uintptr_t)desc_addr;
dev->vrings[queue_idx].avail = (void *)(uintptr_t)avail_addr;
dev->vrings[queue_idx].used = (void *)(uintptr_t)used_addr;
return 0;
}
static void
virtio_user_del_queue(struct virtio_dev *vdev, struct virtqueue *vq)
{
/* For legacy devices, write 0 to VIRTIO_PCI_QUEUE_PFN port, QEMU
* correspondingly stops the ioeventfds, and reset the status of
* the device.
* For modern devices, set queue desc, avail, used in PCI bar to 0,
* not see any more behavior in QEMU.
*
* Here we just care about what information to deliver to vhost-user.
* So we just close ioeventfd for now.
*/
struct virtio_user_dev *dev = vdev->ctx;
close(dev->callfds[vq->vq_queue_index]);
close(dev->kickfds[vq->vq_queue_index]);
dev->callfds[vq->vq_queue_index] = -1;
dev->kickfds[vq->vq_queue_index] = -1;
spdk_free(vq->vq_ring_virt_mem);
}
static void
virtio_user_notify_queue(struct virtio_dev *vdev, struct virtqueue *vq)
{
uint64_t buf = 1;
struct virtio_user_dev *dev = vdev->ctx;
if (write(dev->kickfds[vq->vq_queue_index], &buf, sizeof(buf)) < 0) {
SPDK_ERRLOG("failed to kick backend: %s.\n", spdk_strerror(errno));
}
}
static void
virtio_user_destroy(struct virtio_dev *vdev)
{
struct virtio_user_dev *dev = vdev->ctx;
close(dev->vhostfd);
free(dev);
}
static void
virtio_user_dump_json_info(struct virtio_dev *vdev, struct spdk_json_write_ctx *w)
{
struct virtio_user_dev *dev = vdev->ctx;
spdk_json_write_named_string(w, "type", "user");
spdk_json_write_named_string(w, "socket", dev->path);
}
static void
virtio_user_write_json_config(struct virtio_dev *vdev, struct spdk_json_write_ctx *w)
{
struct virtio_user_dev *dev = vdev->ctx;
spdk_json_write_named_string(w, "trtype", "user");
spdk_json_write_named_string(w, "traddr", dev->path);
spdk_json_write_named_uint32(w, "vq_count", vdev->max_queues - vdev->fixed_queues_num);
spdk_json_write_named_uint32(w, "vq_size", virtio_dev_backend_ops(vdev)->get_queue_size(vdev, 0));
}
static const struct virtio_dev_ops virtio_user_ops = {
.read_dev_cfg = virtio_user_read_dev_config,
.write_dev_cfg = virtio_user_write_dev_config,
.get_status = virtio_user_get_status,
.set_status = virtio_user_set_status,
.get_features = virtio_user_get_features,
.set_features = virtio_user_set_features,
.destruct_dev = virtio_user_destroy,
.get_queue_size = virtio_user_get_queue_size,
.setup_queue = virtio_user_setup_queue,
.del_queue = virtio_user_del_queue,
.notify_queue = virtio_user_notify_queue,
.dump_json_info = virtio_user_dump_json_info,
.write_json_config = virtio_user_write_json_config,
};
int
virtio_user_dev_init(struct virtio_dev *vdev, const char *name, const char *path,
uint32_t queue_size)
{
struct virtio_user_dev *dev;
int rc;
if (name == NULL) {
SPDK_ERRLOG("No name gived for controller: %s\n", path);
return -EINVAL;
}
dev = calloc(1, sizeof(*dev));
if (dev == NULL) {
return -ENOMEM;
}
rc = virtio_dev_construct(vdev, name, &virtio_user_ops, dev);
if (rc != 0) {
SPDK_ERRLOG("Failed to init device: %s\n", path);
free(dev);
return rc;
}
vdev->is_hw = 0;
snprintf(dev->path, PATH_MAX, "%s", path);
dev->queue_size = queue_size;
rc = virtio_user_dev_setup(vdev);
if (rc < 0) {
SPDK_ERRLOG("backend set up fails\n");
goto err;
}
rc = vhost_user_sock(dev, VHOST_USER_SET_OWNER, NULL);
if (rc < 0) {
SPDK_ERRLOG("set_owner fails: %s\n", spdk_strerror(-rc));
goto err;
}
return 0;
err:
virtio_dev_destruct(vdev);
return rc;
}
SPDK_LOG_REGISTER_COMPONENT(virtio_user)