Spdk/lib/vhost/rte_vhost_compat.c
Jin Yu f29d20a21e vhost: add a protocol_features parameter in vdev
Add the protocol_features in vdev. There are two features
would be used in vhost device one is the virtio_features
the other is the vhost-user protocol_features. For different
vhost device, the supported features are different so we can
separate them.
Another reason is that I tested the VHOST_USER_PROTOCOL_F_
INFLIGHT_SHMFD in vhost-scsi with QEMU(version:4.0) and found
that Qemu can not boot up. After investigating found that inflight
flag is negotiated but the Qemu doesn't support this feature
and in DPDK function it is handled as an error and disconnect
with Qemu. It's a bug in DPDK and will fix it.

Change-Id: I72e418cb1885bf7dcbd0285d9cec1ad6af0665de
Signed-off-by: Jin Yu <jin.yu@intel.com>
Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/478814
Community-CI: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Changpeng Liu <changpeng.liu@intel.com>
Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Reviewed-by: GangCao <gang.cao@intel.com>
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
2019-12-30 11:46:24 +00:00

406 lines
12 KiB
C

/*-
* BSD LICENSE
*
* Copyright (c) Intel Corporation.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* Set of workarounds for rte_vhost to make it work with device types
* other than vhost-net.
*/
#include "spdk/stdinc.h"
#include "spdk/env.h"
#include "spdk/likely.h"
#include "spdk/string.h"
#include "spdk/util.h"
#include "spdk/barrier.h"
#include "spdk/vhost.h"
#include "vhost_internal.h"
#include "spdk_internal/vhost_user.h"
#include "spdk_internal/memory.h"
void
vhost_session_mem_register(struct rte_vhost_memory *mem)
{
struct rte_vhost_mem_region *region;
uint32_t i;
uint64_t previous_start = UINT64_MAX;
for (i = 0; i < mem->nregions; i++) {
uint64_t start, end, len;
region = &mem->regions[i];
start = FLOOR_2MB(region->mmap_addr);
end = CEIL_2MB(region->mmap_addr + region->mmap_size);
if (start == previous_start) {
start += (size_t) VALUE_2MB;
}
previous_start = start;
len = end - start;
SPDK_INFOLOG(SPDK_LOG_VHOST, "Registering VM memory for vtophys translation - 0x%jx len:0x%jx\n",
start, len);
if (spdk_mem_register((void *)start, len) != 0) {
SPDK_WARNLOG("Failed to register memory region %"PRIu32". Future vtophys translation might fail.\n",
i);
continue;
}
}
}
void
vhost_session_mem_unregister(struct rte_vhost_memory *mem)
{
struct rte_vhost_mem_region *region;
uint32_t i;
uint64_t previous_start = UINT64_MAX;
for (i = 0; i < mem->nregions; i++) {
uint64_t start, end, len;
region = &mem->regions[i];
start = FLOOR_2MB(region->mmap_addr);
end = CEIL_2MB(region->mmap_addr + region->mmap_size);
if (start == previous_start) {
start += (size_t) VALUE_2MB;
}
previous_start = start;
len = end - start;
if (spdk_vtophys((void *) start, NULL) == SPDK_VTOPHYS_ERROR) {
continue; /* region has not been registered */
}
if (spdk_mem_unregister((void *)start, len) != 0) {
assert(false);
}
}
}
static int
new_connection(int vid)
{
char ifname[PATH_MAX];
if (rte_vhost_get_ifname(vid, ifname, PATH_MAX) < 0) {
SPDK_ERRLOG("Couldn't get a valid ifname for device with vid %d\n", vid);
return -1;
}
return vhost_new_connection_cb(vid, ifname);
}
static int
start_device(int vid)
{
return vhost_start_device_cb(vid);
}
static void
stop_device(int vid)
{
vhost_stop_device_cb(vid);
}
static void
destroy_connection(int vid)
{
vhost_destroy_connection_cb(vid);
}
static const struct vhost_device_ops g_spdk_vhost_ops = {
.new_device = start_device,
.destroy_device = stop_device,
.new_connection = new_connection,
.destroy_connection = destroy_connection,
#ifdef SPDK_CONFIG_VHOST_INTERNAL_LIB
.get_config = vhost_get_config_cb,
.set_config = vhost_set_config_cb,
.vhost_nvme_admin_passthrough = vhost_nvme_admin_passthrough,
.vhost_nvme_set_cq_call = vhost_nvme_set_cq_call,
.vhost_nvme_get_cap = vhost_nvme_get_cap,
.vhost_nvme_set_bar_mr = vhost_nvme_set_bar_mr,
#endif
};
#ifndef SPDK_CONFIG_VHOST_INTERNAL_LIB
static enum rte_vhost_msg_result
spdk_extern_vhost_pre_msg_handler(int vid, void *_msg)
{
struct vhost_user_msg *msg = _msg;
struct spdk_vhost_session *vsession;
vsession = vhost_session_find_by_vid(vid);
if (vsession == NULL) {
SPDK_ERRLOG("Received a message to unitialized session (vid %d).\n", vid);
assert(false);
return RTE_VHOST_MSG_RESULT_ERR;
}
switch (msg->request) {
case VHOST_USER_GET_VRING_BASE:
if (vsession->forced_polling && vsession->started) {
/* Our queue is stopped for whatever reason, but we may still
* need to poll it after it's initialized again.
*/
g_spdk_vhost_ops.destroy_device(vid);
}
break;
case VHOST_USER_SET_VRING_BASE:
case VHOST_USER_SET_VRING_ADDR:
case VHOST_USER_SET_VRING_NUM:
case VHOST_USER_SET_VRING_KICK:
if (vsession->forced_polling && vsession->started) {
/* Additional queues are being initialized, so we either processed
* enough I/Os and are switching from SeaBIOS to the OS now, or
* we were never in SeaBIOS in the first place. Either way, we
* don't need our workaround anymore.
*/
g_spdk_vhost_ops.destroy_device(vid);
vsession->forced_polling = false;
}
break;
case VHOST_USER_SET_VRING_CALL:
/* rte_vhost will close the previous callfd and won't notify
* us about any change. This will effectively make SPDK fail
* to deliver any subsequent interrupts until a session is
* restarted. We stop the session here before closing the previous
* fd (so that all interrupts must have been delivered by the
* time the descriptor is closed) and start right after (which
* will make SPDK retrieve the latest, up-to-date callfd from
* rte_vhost.
*/
case VHOST_USER_SET_MEM_TABLE:
/* rte_vhost will unmap previous memory that SPDK may still
* have pending DMA operations on. We can't let that happen,
* so stop the device before letting rte_vhost unmap anything.
* This will block until all pending I/Os are finished.
* We will start the device again from the post-processing
* message handler.
*/
if (vsession->started) {
g_spdk_vhost_ops.destroy_device(vid);
vsession->needs_restart = true;
}
break;
case VHOST_USER_GET_CONFIG: {
int rc = 0;
spdk_vhost_lock();
if (vsession->vdev->backend->vhost_get_config) {
rc = vsession->vdev->backend->vhost_get_config(vsession->vdev,
msg->payload.cfg.region, msg->payload.cfg.size);
if (rc != 0) {
msg->size = 0;
}
}
spdk_vhost_unlock();
return RTE_VHOST_MSG_RESULT_REPLY;
}
case VHOST_USER_SET_CONFIG: {
int rc = 0;
spdk_vhost_lock();
if (vsession->vdev->backend->vhost_set_config) {
rc = vsession->vdev->backend->vhost_set_config(vsession->vdev,
msg->payload.cfg.region, msg->payload.cfg.offset,
msg->payload.cfg.size, msg->payload.cfg.flags);
}
spdk_vhost_unlock();
return rc == 0 ? RTE_VHOST_MSG_RESULT_OK : RTE_VHOST_MSG_RESULT_ERR;
}
default:
break;
}
return RTE_VHOST_MSG_RESULT_NOT_HANDLED;
}
static enum rte_vhost_msg_result
spdk_extern_vhost_post_msg_handler(int vid, void *_msg)
{
struct vhost_user_msg *msg = _msg;
struct spdk_vhost_session *vsession;
vsession = vhost_session_find_by_vid(vid);
if (vsession == NULL) {
SPDK_ERRLOG("Received a message to unitialized session (vid %d).\n", vid);
assert(false);
return RTE_VHOST_MSG_RESULT_ERR;
}
if (vsession->needs_restart) {
g_spdk_vhost_ops.new_device(vid);
vsession->needs_restart = false;
return RTE_VHOST_MSG_RESULT_NOT_HANDLED;
}
switch (msg->request) {
case VHOST_USER_SET_FEATURES:
/* rte_vhost requires all queues to be fully initialized in order
* to start I/O processing. This behavior is not compliant with the
* vhost-user specification and doesn't work with QEMU 2.12+, which
* will only initialize 1 I/O queue for the SeaBIOS boot.
* Theoretically, we should start polling each virtqueue individually
* after receiving its SET_VRING_KICK message, but rte_vhost is not
* designed to poll individual queues. So here we use a workaround
* to detect when the vhost session could be potentially at that SeaBIOS
* stage and we mark it to start polling as soon as its first virtqueue
* gets initialized. This doesn't hurt any non-QEMU vhost slaves
* and allows QEMU 2.12+ to boot correctly. SET_FEATURES could be sent
* at any time, but QEMU will send it at least once on SeaBIOS
* initialization - whenever powered-up or rebooted.
*/
vsession->forced_polling = true;
break;
case VHOST_USER_SET_VRING_KICK:
/* vhost-user spec tells us to start polling a queue after receiving
* its SET_VRING_KICK message. Let's do it!
*/
if (vsession->forced_polling && !vsession->started) {
g_spdk_vhost_ops.new_device(vid);
}
break;
default:
break;
}
return RTE_VHOST_MSG_RESULT_NOT_HANDLED;
}
struct rte_vhost_user_extern_ops g_spdk_extern_vhost_ops = {
.pre_msg_handle = spdk_extern_vhost_pre_msg_handler,
.post_msg_handle = spdk_extern_vhost_post_msg_handler,
};
void
vhost_session_install_rte_compat_hooks(struct spdk_vhost_session *vsession)
{
int rc;
rc = rte_vhost_extern_callback_register(vsession->vid, &g_spdk_extern_vhost_ops, NULL);
if (rc != 0) {
SPDK_ERRLOG("rte_vhost_extern_callback_register() failed for vid = %d\n",
vsession->vid);
return;
}
}
#else /* SPDK_CONFIG_VHOST_INTERNAL_LIB */
void
vhost_session_install_rte_compat_hooks(struct spdk_vhost_session *vsession)
{
/* nothing to do. all the changes are already incorporated into rte_vhost */
}
#endif
int
vhost_register_unix_socket(const char *path, const char *ctrl_name,
uint64_t virtio_features, uint64_t disabled_features, uint64_t protocol_features)
{
struct stat file_stat;
#ifndef SPDK_CONFIG_VHOST_INTERNAL_LIB
uint64_t features = 0;
#endif
/* Register vhost driver to handle vhost messages. */
if (stat(path, &file_stat) != -1) {
if (!S_ISSOCK(file_stat.st_mode)) {
SPDK_ERRLOG("Cannot create a domain socket at path \"%s\": "
"The file already exists and is not a socket.\n",
path);
return -EIO;
} else if (unlink(path) != 0) {
SPDK_ERRLOG("Cannot create a domain socket at path \"%s\": "
"The socket already exists and failed to unlink.\n",
path);
return -EIO;
}
}
if (rte_vhost_driver_register(path, 0) != 0) {
SPDK_ERRLOG("Could not register controller %s with vhost library\n", ctrl_name);
SPDK_ERRLOG("Check if domain socket %s already exists\n", path);
return -EIO;
}
if (rte_vhost_driver_set_features(path, virtio_features) ||
rte_vhost_driver_disable_features(path, disabled_features)) {
SPDK_ERRLOG("Couldn't set vhost features for controller %s\n", ctrl_name);
rte_vhost_driver_unregister(path);
return -EIO;
}
if (rte_vhost_driver_callback_register(path, &g_spdk_vhost_ops) != 0) {
rte_vhost_driver_unregister(path);
SPDK_ERRLOG("Couldn't register callbacks for controller %s\n", ctrl_name);
return -EIO;
}
#ifndef SPDK_CONFIG_VHOST_INTERNAL_LIB
rte_vhost_driver_get_protocol_features(path, &features);
features |= protocol_features;
rte_vhost_driver_set_protocol_features(path, features);
#endif
if (rte_vhost_driver_start(path) != 0) {
SPDK_ERRLOG("Failed to start vhost driver for controller %s (%d): %s\n",
ctrl_name, errno, spdk_strerror(errno));
rte_vhost_driver_unregister(path);
return -EIO;
}
return 0;
}
int
vhost_get_mem_table(int vid, struct rte_vhost_memory **mem)
{
return rte_vhost_get_mem_table(vid, mem);
}
int
vhost_driver_unregister(const char *path)
{
return rte_vhost_driver_unregister(path);
}
int
vhost_get_negotiated_features(int vid, uint64_t *negotiated_features)
{
return rte_vhost_get_negotiated_features(vid, negotiated_features);
}