vhost/nvme: add shared BAR space to enable old Guest kernel
For some old Linux Guest kernels, the new NVMe 1.3 feature: shadow doorbell buffer is not enabled, while here, make a dummy BAR region inside slave target, when Guest submits a new request, the doorbell value will be write to the shared memory between Guest and vhost target, so that the existing vhost target can support both new Linux Guest kernel(newer than 4.12) and old Guest kernel. Also, the shared BAR space can be used in future which we can move ADMIN queue processing into SPDK vhost target, with this feature, the QEMU driver will become very small and easy for upstreaming. Change-Id: I9463e9f13421368f43bfe4076facddd119f4552e Signed-off-by: Changpeng Liu <changpeng.liu@intel.com> Reviewed-on: https://review.gerrithub.io/419157 Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Chandler-Test-Pool: SPDK Automated Test System <sys_sgsw@intel.com> Reviewed-by: Jim Harris <james.r.harris@intel.com> Reviewed-by: Darek Stojaczyk <dariusz.stojaczyk@intel.com>
This commit is contained in:
parent
6569a529d6
commit
fbc53ae3fb
@ -106,6 +106,7 @@ struct vhost_device_ops {
|
||||
int (*features_changed)(int vid, uint64_t features);
|
||||
int (*vhost_nvme_admin_passthrough)(int vid, void *cmd, void *cqe, void *buf);
|
||||
int (*vhost_nvme_set_cq_call)(int vid, uint16_t qid, int fd);
|
||||
int (*vhost_nvme_set_bar_mr)(int vid, void *bar_addr, uint64_t bar_size);
|
||||
int (*vhost_nvme_get_cap)(int vid, uint64_t *cap);
|
||||
|
||||
int (*new_connection)(int vid);
|
||||
|
@ -200,6 +200,8 @@ struct virtio_net {
|
||||
uint32_t max_guest_pages;
|
||||
struct guest_page *guest_pages;
|
||||
int has_new_mem_table;
|
||||
void *bar_addr;
|
||||
uint64_t bar_size;
|
||||
struct VhostUserMemory mem_table;
|
||||
int mem_table_fds[VHOST_MEMORY_MAX_NREGIONS];
|
||||
} __rte_cache_aligned;
|
||||
|
@ -84,7 +84,8 @@ static const char *vhost_message_str[VHOST_USER_MAX] = {
|
||||
[VHOST_USER_NVME_SET_CQ_CALL] = "VHOST_USER_NVME_SET_CQ_CALL",
|
||||
[VHOST_USER_NVME_GET_CAP] = "VHOST_USER_NVME_GET_CAP",
|
||||
[VHOST_USER_NVME_START_STOP] = "VHOST_USER_NVME_START_STOP",
|
||||
[VHOST_USER_NVME_IO_CMD] = "VHOST_USER_NVME_IO_CMD"
|
||||
[VHOST_USER_NVME_IO_CMD] = "VHOST_USER_NVME_IO_CMD",
|
||||
[VHOST_USER_NVME_SET_BAR_MR] = "VHOST_USER_NVME_SET_BAR_MR"
|
||||
};
|
||||
|
||||
static uint64_t
|
||||
@ -139,6 +140,11 @@ vhost_backend_cleanup(struct virtio_net *dev)
|
||||
munmap((void *)(uintptr_t)dev->log_addr, dev->log_size);
|
||||
dev->log_addr = 0;
|
||||
}
|
||||
if (dev->bar_addr) {
|
||||
munmap((void *)(uintptr_t)dev->bar_addr, dev->bar_size);
|
||||
dev->bar_addr = NULL;
|
||||
dev->bar_size = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1119,6 +1125,90 @@ vhost_user_nvme_get_cap(struct virtio_net *dev, uint64_t *cap)
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int
|
||||
vhost_user_nvme_set_bar_mr(struct virtio_net *dev, struct VhostUserMsg *pmsg)
|
||||
{
|
||||
struct VhostUserMemory mem_table;
|
||||
int fd = pmsg->fds[0];
|
||||
void *mmap_addr;
|
||||
uint64_t mmap_size;
|
||||
uint64_t mmap_offset;
|
||||
uint64_t alignment;
|
||||
struct rte_vhost_mem_region reg;
|
||||
int ret = 0;
|
||||
|
||||
memcpy(&mem_table, &pmsg->payload.memory, sizeof(mem_table));
|
||||
|
||||
reg.guest_phys_addr = mem_table.regions[0].guest_phys_addr;
|
||||
reg.guest_user_addr = mem_table.regions[0].userspace_addr;
|
||||
reg.size = mem_table.regions[0].memory_size;
|
||||
reg.fd = fd;
|
||||
mmap_offset = mem_table.regions[0].mmap_offset;
|
||||
mmap_size = reg.size + mmap_offset;
|
||||
|
||||
alignment = get_blk_size(fd);
|
||||
if (alignment == (uint64_t)-1) {
|
||||
RTE_LOG(ERR, VHOST_CONFIG,
|
||||
"couldn't get hugepage size through fstat\n");
|
||||
return -1;
|
||||
}
|
||||
mmap_size = RTE_ALIGN_CEIL(mmap_size, alignment);
|
||||
|
||||
mmap_addr = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE,
|
||||
MAP_SHARED | MAP_POPULATE, fd, 0);
|
||||
|
||||
if (mmap_addr == MAP_FAILED) {
|
||||
RTE_LOG(ERR, VHOST_CONFIG,
|
||||
"mmap region failed.\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (madvise(mmap_addr, mmap_size, MADV_DONTDUMP) != 0) {
|
||||
RTE_LOG(INFO, VHOST_CONFIG,
|
||||
"MADV_DONTDUMP advice setting failed.\n");
|
||||
}
|
||||
|
||||
reg.mmap_addr = mmap_addr;
|
||||
reg.mmap_size = mmap_size;
|
||||
reg.host_user_addr = (uint64_t)(uintptr_t)mmap_addr +
|
||||
mmap_offset;
|
||||
|
||||
RTE_LOG(INFO, VHOST_CONFIG,
|
||||
"BAR memory region %u, size: 0x%" PRIx64 "\n"
|
||||
"\t guest physical addr: 0x%" PRIx64 "\n"
|
||||
"\t guest virtual addr: 0x%" PRIx64 "\n"
|
||||
"\t host virtual addr: 0x%" PRIx64 "\n"
|
||||
"\t mmap addr : 0x%" PRIx64 "\n"
|
||||
"\t mmap size : 0x%" PRIx64 "\n"
|
||||
"\t mmap align: 0x%" PRIx64 "\n"
|
||||
"\t mmap off : 0x%" PRIx64 "\n",
|
||||
0, reg.size,
|
||||
reg.guest_phys_addr,
|
||||
reg.guest_user_addr,
|
||||
reg.host_user_addr,
|
||||
(uint64_t)(uintptr_t)mmap_addr,
|
||||
mmap_size,
|
||||
alignment,
|
||||
mmap_offset);
|
||||
|
||||
if (dev->bar_addr) {
|
||||
munmap((void *)(uintptr_t)dev->bar_addr, dev->bar_size);
|
||||
}
|
||||
dev->bar_addr = (void *)(uintptr_t)reg.host_user_addr;
|
||||
dev->bar_size = reg.mmap_size;
|
||||
|
||||
if (dev->notify_ops->vhost_nvme_set_bar_mr) {
|
||||
ret = dev->notify_ops->vhost_nvme_set_bar_mr(dev->vid, dev->bar_addr, dev->bar_size);
|
||||
if (ret) {
|
||||
munmap((void *)(uintptr_t)dev->bar_addr, dev->bar_size);
|
||||
dev->bar_addr = NULL;
|
||||
dev->bar_size = 0;
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int
|
||||
vhost_user_msg_handler(int vid, int fd)
|
||||
{
|
||||
@ -1243,6 +1333,9 @@ vhost_user_msg_handler(int vid, int fd)
|
||||
is_submission_queue = (msg.payload.nvme_io.queue_type == VHOST_USER_NVME_SUBMISSION_QUEUE) ? true : false;
|
||||
vhost_user_nvme_io_request_passthrough(dev, qid, tail_head, is_submission_queue);
|
||||
break;
|
||||
case VHOST_USER_NVME_SET_BAR_MR:
|
||||
ret = vhost_user_nvme_set_bar_mr(dev, &msg);
|
||||
break;
|
||||
case VHOST_USER_GET_FEATURES:
|
||||
msg.payload.u64 = vhost_user_get_features(dev);
|
||||
msg.size = sizeof(msg.payload.u64);
|
||||
|
@ -91,6 +91,7 @@ typedef enum VhostUserRequest {
|
||||
VHOST_USER_NVME_GET_CAP = 82,
|
||||
VHOST_USER_NVME_START_STOP = 83,
|
||||
VHOST_USER_NVME_IO_CMD = 84,
|
||||
VHOST_USER_NVME_SET_BAR_MR = 85,
|
||||
VHOST_USER_MAX
|
||||
} VhostUserRequest;
|
||||
|
||||
|
@ -82,6 +82,7 @@ const struct vhost_device_ops g_spdk_vhost_ops = {
|
||||
.vhost_nvme_admin_passthrough = spdk_vhost_nvme_admin_passthrough,
|
||||
.vhost_nvme_set_cq_call = spdk_vhost_nvme_set_cq_call,
|
||||
.vhost_nvme_get_cap = spdk_vhost_nvme_get_cap,
|
||||
.vhost_nvme_set_bar_mr = spdk_vhost_nvme_set_bar_mr,
|
||||
};
|
||||
|
||||
static TAILQ_HEAD(, spdk_vhost_dev) g_spdk_vhost_devices = TAILQ_HEAD_INITIALIZER(
|
||||
|
@ -267,6 +267,7 @@ void spdk_vhost_unlock(void);
|
||||
int spdk_remove_vhost_controller(struct spdk_vhost_dev *vdev);
|
||||
int spdk_vhost_nvme_admin_passthrough(int vid, void *cmd, void *cqe, void *buf);
|
||||
int spdk_vhost_nvme_set_cq_call(int vid, uint16_t qid, int fd);
|
||||
int spdk_vhost_nvme_set_bar_mr(int vid, void *bar_addr, uint64_t bar_size);
|
||||
int spdk_vhost_nvme_get_cap(int vid, uint64_t *cap);
|
||||
int spdk_vhost_nvme_controller_construct(void);
|
||||
int spdk_vhost_nvme_dev_construct(const char *name, const char *cpumask, uint32_t io_queues);
|
||||
|
@ -136,6 +136,11 @@ struct spdk_vhost_nvme_dev {
|
||||
uint32_t num_ns;
|
||||
struct spdk_vhost_nvme_ns ns[MAX_NAMESPACE];
|
||||
|
||||
volatile uint32_t *bar;
|
||||
volatile uint32_t *bar_db;
|
||||
uint64_t bar_size;
|
||||
bool dataplane_started;
|
||||
|
||||
volatile uint32_t *dbbuf_dbs;
|
||||
volatile uint32_t *dbbuf_eis;
|
||||
struct spdk_vhost_nvme_sq sq_queue[MAX_IO_QUEUES + 1];
|
||||
@ -224,6 +229,21 @@ spdk_vhost_nvme_get_cq_from_qid(struct spdk_vhost_nvme_dev *dev, uint16_t qid)
|
||||
return &dev->cq_queue[qid];
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
spdk_vhost_nvme_get_queue_head(struct spdk_vhost_nvme_dev *nvme, uint32_t offset)
|
||||
{
|
||||
if (nvme->dataplane_started) {
|
||||
return nvme->dbbuf_dbs[offset];
|
||||
|
||||
} else if (nvme->bar) {
|
||||
return nvme->bar_db[offset];
|
||||
}
|
||||
|
||||
assert(0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
spdk_nvme_map_prps(struct spdk_vhost_nvme_dev *nvme, struct spdk_nvme_cmd *cmd,
|
||||
struct spdk_vhost_nvme_task *task, uint32_t len)
|
||||
@ -309,7 +329,7 @@ spdk_nvme_cq_signal_fd(struct spdk_vhost_nvme_dev *nvme)
|
||||
continue;
|
||||
}
|
||||
|
||||
cq_head = nvme->dbbuf_dbs[cq_offset(qid, 1)];
|
||||
cq_head = spdk_vhost_nvme_get_queue_head(nvme, cq_offset(qid, 1));
|
||||
if (cq->irq_enabled && cq->need_signaled_cnt && (cq->cq_head != cq_head)) {
|
||||
eventfd_write(cq->virq, (eventfd_t)1);
|
||||
cq->need_signaled_cnt = 0;
|
||||
@ -334,7 +354,7 @@ spdk_vhost_nvme_task_complete(struct spdk_vhost_nvme_task *task)
|
||||
return;
|
||||
}
|
||||
|
||||
cq->guest_signaled_cq_head = nvme->dbbuf_dbs[cq_offset(cqid, 1)];
|
||||
cq->guest_signaled_cq_head = spdk_vhost_nvme_get_queue_head(nvme, cq_offset(cqid, 1));
|
||||
if (spdk_unlikely(nvme_cq_is_full(cq))) {
|
||||
STAILQ_INSERT_TAIL(&cq->cq_full_waited_tasks, task, stailq);
|
||||
return;
|
||||
@ -355,7 +375,9 @@ spdk_vhost_nvme_task_complete(struct spdk_vhost_nvme_task *task)
|
||||
cq->need_signaled_cnt++;
|
||||
|
||||
/* MMIO Controll */
|
||||
nvme->dbbuf_eis[cq_offset(cqid, 1)] = (uint32_t)(cq->guest_signaled_cq_head - 1);
|
||||
if (nvme->dataplane_started) {
|
||||
nvme->dbbuf_eis[cq_offset(cqid, 1)] = (uint32_t)(cq->guest_signaled_cq_head - 1);
|
||||
}
|
||||
|
||||
STAILQ_INSERT_TAIL(&nvme->free_tasks, task, stailq);
|
||||
}
|
||||
@ -607,10 +629,7 @@ nvme_worker(void *arg)
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* worker thread can't start before the admin doorbell
|
||||
* buffer config command
|
||||
*/
|
||||
if (spdk_unlikely(!nvme->dbbuf_dbs)) {
|
||||
if (spdk_unlikely(!nvme->dataplane_started && !nvme->bar)) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
@ -624,7 +643,7 @@ nvme_worker(void *arg)
|
||||
if (spdk_unlikely(!cq)) {
|
||||
return -1;
|
||||
}
|
||||
cq->guest_signaled_cq_head = nvme->dbbuf_dbs[cq_offset(sq->cqid, 1)];
|
||||
cq->guest_signaled_cq_head = spdk_vhost_nvme_get_queue_head(nvme, cq_offset(sq->cqid, 1));
|
||||
if (spdk_unlikely(!STAILQ_EMPTY(&cq->cq_full_waited_tasks) &&
|
||||
!nvme_cq_is_full(cq))) {
|
||||
task = STAILQ_FIRST(&cq->cq_full_waited_tasks);
|
||||
@ -632,7 +651,7 @@ nvme_worker(void *arg)
|
||||
spdk_vhost_nvme_task_complete(task);
|
||||
}
|
||||
|
||||
dbbuf_sq = nvme->dbbuf_dbs[sq_offset(qid, 1)];
|
||||
dbbuf_sq = spdk_vhost_nvme_get_queue_head(nvme, sq_offset(qid, 1));
|
||||
sq->sq_tail = (uint16_t)dbbuf_sq;
|
||||
count = 0;
|
||||
|
||||
@ -658,7 +677,9 @@ nvme_worker(void *arg)
|
||||
}
|
||||
|
||||
/* MMIO Control */
|
||||
nvme->dbbuf_eis[sq_offset(qid, 1)] = (uint32_t)(sq->sq_head - 1);
|
||||
if (nvme->dataplane_started) {
|
||||
nvme->dbbuf_eis[sq_offset(qid, 1)] = (uint32_t)(sq->sq_head - 1);
|
||||
}
|
||||
|
||||
/* Maximum batch I/Os to pick up at once */
|
||||
if (count++ == MAX_BATCH_IO) {
|
||||
@ -697,6 +718,10 @@ vhost_nvme_doorbell_buffer_config(struct spdk_vhost_nvme_dev *nvme,
|
||||
|
||||
cpl->status.sc = 0;
|
||||
cpl->status.sct = 0;
|
||||
|
||||
/* Data plane started */
|
||||
nvme->dataplane_started = true;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -744,6 +769,9 @@ vhost_nvme_create_io_sq(struct spdk_vhost_nvme_dev *nvme,
|
||||
}
|
||||
nvme->num_sqs++;
|
||||
sq->valid = true;
|
||||
if (nvme->bar) {
|
||||
nvme->bar_db[sq_offset(qid, 1)] = 0;
|
||||
}
|
||||
|
||||
cpl->status.sc = 0;
|
||||
cpl->status.sct = 0;
|
||||
@ -824,6 +852,9 @@ vhost_nvme_create_io_cq(struct spdk_vhost_nvme_dev *nvme,
|
||||
}
|
||||
nvme->num_cqs++;
|
||||
cq->valid = true;
|
||||
if (nvme->bar) {
|
||||
nvme->bar_db[cq_offset(qid, 1)] = 0;
|
||||
}
|
||||
STAILQ_INIT(&cq->cq_full_waited_tasks);
|
||||
|
||||
cpl->status.sc = 0;
|
||||
@ -890,7 +921,6 @@ spdk_vhost_nvme_admin_passthrough(int vid, void *cmd, void *cqe, void *buf)
|
||||
struct spdk_vhost_nvme_ns *ns;
|
||||
int ret = 0;
|
||||
struct spdk_vhost_nvme_dev *nvme;
|
||||
uint32_t cq_head, sq_tail;
|
||||
|
||||
nvme = spdk_vhost_nvme_get_by_name(vid);
|
||||
if (!nvme) {
|
||||
@ -943,10 +973,6 @@ spdk_vhost_nvme_admin_passthrough(int vid, void *cmd, void *cqe, void *buf)
|
||||
ret = vhost_nvme_doorbell_buffer_config(nvme, req, cpl);
|
||||
break;
|
||||
case SPDK_NVME_OPC_ABORT:
|
||||
sq_tail = nvme->dbbuf_dbs[sq_offset(1, 1)] & 0xffffu;
|
||||
cq_head = nvme->dbbuf_dbs[cq_offset(1, 1)] & 0xffffu;
|
||||
SPDK_NOTICELOG("ABORT: CID %u, SQ_TAIL %u, CQ_HEAD %u\n",
|
||||
(req->cdw10 >> 16) & 0xffffu, sq_tail, cq_head);
|
||||
/* TODO: ABORT failed fow now */
|
||||
cpl->cdw0 = 1;
|
||||
cpl->status.sc = 0;
|
||||
@ -961,6 +987,24 @@ spdk_vhost_nvme_admin_passthrough(int vid, void *cmd, void *cqe, void *buf)
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
spdk_vhost_nvme_set_bar_mr(int vid, void *bar_addr, uint64_t bar_size)
|
||||
{
|
||||
struct spdk_vhost_nvme_dev *nvme;
|
||||
|
||||
nvme = spdk_vhost_nvme_get_by_name(vid);
|
||||
if (!nvme) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
nvme->bar = (volatile uint32_t *)(uintptr_t)(bar_addr);
|
||||
/* BAR0 SQ/CQ doorbell registers start from offset 0x1000 */
|
||||
nvme->bar_db = (volatile uint32_t *)(uintptr_t)(bar_addr + 0x1000ull);
|
||||
nvme->bar_size = bar_size;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
spdk_vhost_nvme_set_cq_call(int vid, uint16_t qid, int fd)
|
||||
{
|
||||
@ -1095,10 +1139,15 @@ destroy_device_poller_cb(void *arg)
|
||||
ns_dev->bdev_io_channel = NULL;
|
||||
}
|
||||
}
|
||||
/* Clear BAR space */
|
||||
if (nvme->bar) {
|
||||
memset((void *)nvme->bar, 0, nvme->bar_size);
|
||||
}
|
||||
nvme->num_sqs = 0;
|
||||
nvme->num_cqs = 0;
|
||||
nvme->dbbuf_dbs = NULL;
|
||||
nvme->dbbuf_eis = NULL;
|
||||
nvme->dataplane_started = false;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -105,6 +105,7 @@ DEFINE_STUB(spdk_vhost_scsi_controller_construct, int, (void), 0);
|
||||
DEFINE_STUB(spdk_vhost_blk_controller_construct, int, (void), 0);
|
||||
DEFINE_STUB(spdk_vhost_nvme_admin_passthrough, int, (int vid, void *cmd, void *cqe, void *buf), 0);
|
||||
DEFINE_STUB(spdk_vhost_nvme_set_cq_call, int, (int vid, uint16_t qid, int fd), 0);
|
||||
DEFINE_STUB(spdk_vhost_nvme_set_bar_mr, int, (int vid, void *bar, uint64_t bar_size), 0);
|
||||
DEFINE_STUB(spdk_vhost_nvme_get_cap, int, (int vid, uint64_t *cap), 0);
|
||||
DEFINE_STUB(spdk_vhost_nvme_controller_construct, int, (void), 0);
|
||||
DEFINE_STUB(rte_vhost_set_vhost_vring_last_idx, int,
|
||||
|
Loading…
Reference in New Issue
Block a user