diff --git a/lib/vhost/rte_vhost/rte_vhost.h b/lib/vhost/rte_vhost/rte_vhost.h index 29f5b6130..17b2c67b4 100644 --- a/lib/vhost/rte_vhost/rte_vhost.h +++ b/lib/vhost/rte_vhost/rte_vhost.h @@ -106,6 +106,7 @@ struct vhost_device_ops { int (*features_changed)(int vid, uint64_t features); int (*vhost_nvme_admin_passthrough)(int vid, void *cmd, void *cqe, void *buf); int (*vhost_nvme_set_cq_call)(int vid, uint16_t qid, int fd); + int (*vhost_nvme_set_bar_mr)(int vid, void *bar_addr, uint64_t bar_size); int (*vhost_nvme_get_cap)(int vid, uint64_t *cap); int (*new_connection)(int vid); diff --git a/lib/vhost/rte_vhost/vhost.h b/lib/vhost/rte_vhost/vhost.h index b0a0201d3..db20d9bde 100644 --- a/lib/vhost/rte_vhost/vhost.h +++ b/lib/vhost/rte_vhost/vhost.h @@ -200,6 +200,8 @@ struct virtio_net { uint32_t max_guest_pages; struct guest_page *guest_pages; int has_new_mem_table; + void *bar_addr; + uint64_t bar_size; struct VhostUserMemory mem_table; int mem_table_fds[VHOST_MEMORY_MAX_NREGIONS]; } __rte_cache_aligned; diff --git a/lib/vhost/rte_vhost/vhost_user.c b/lib/vhost/rte_vhost/vhost_user.c index b708a8a7b..0c6431b9b 100644 --- a/lib/vhost/rte_vhost/vhost_user.c +++ b/lib/vhost/rte_vhost/vhost_user.c @@ -84,7 +84,8 @@ static const char *vhost_message_str[VHOST_USER_MAX] = { [VHOST_USER_NVME_SET_CQ_CALL] = "VHOST_USER_NVME_SET_CQ_CALL", [VHOST_USER_NVME_GET_CAP] = "VHOST_USER_NVME_GET_CAP", [VHOST_USER_NVME_START_STOP] = "VHOST_USER_NVME_START_STOP", - [VHOST_USER_NVME_IO_CMD] = "VHOST_USER_NVME_IO_CMD" + [VHOST_USER_NVME_IO_CMD] = "VHOST_USER_NVME_IO_CMD", + [VHOST_USER_NVME_SET_BAR_MR] = "VHOST_USER_NVME_SET_BAR_MR" }; static uint64_t @@ -139,6 +140,11 @@ vhost_backend_cleanup(struct virtio_net *dev) munmap((void *)(uintptr_t)dev->log_addr, dev->log_size); dev->log_addr = 0; } + if (dev->bar_addr) { + munmap((void *)(uintptr_t)dev->bar_addr, dev->bar_size); + dev->bar_addr = NULL; + dev->bar_size = 0; + } } /* @@ -1119,6 +1125,90 @@ vhost_user_nvme_get_cap(struct virtio_net *dev, uint64_t *cap) return -1; } +static int +vhost_user_nvme_set_bar_mr(struct virtio_net *dev, struct VhostUserMsg *pmsg) +{ + struct VhostUserMemory mem_table; + int fd = pmsg->fds[0]; + void *mmap_addr; + uint64_t mmap_size; + uint64_t mmap_offset; + uint64_t alignment; + struct rte_vhost_mem_region reg; + int ret = 0; + + memcpy(&mem_table, &pmsg->payload.memory, sizeof(mem_table)); + + reg.guest_phys_addr = mem_table.regions[0].guest_phys_addr; + reg.guest_user_addr = mem_table.regions[0].userspace_addr; + reg.size = mem_table.regions[0].memory_size; + reg.fd = fd; + mmap_offset = mem_table.regions[0].mmap_offset; + mmap_size = reg.size + mmap_offset; + + alignment = get_blk_size(fd); + if (alignment == (uint64_t)-1) { + RTE_LOG(ERR, VHOST_CONFIG, + "couldn't get hugepage size through fstat\n"); + return -1; + } + mmap_size = RTE_ALIGN_CEIL(mmap_size, alignment); + + mmap_addr = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, fd, 0); + + if (mmap_addr == MAP_FAILED) { + RTE_LOG(ERR, VHOST_CONFIG, + "mmap region failed.\n"); + return -1; + } + + if (madvise(mmap_addr, mmap_size, MADV_DONTDUMP) != 0) { + RTE_LOG(INFO, VHOST_CONFIG, + "MADV_DONTDUMP advice setting failed.\n"); + } + + reg.mmap_addr = mmap_addr; + reg.mmap_size = mmap_size; + reg.host_user_addr = (uint64_t)(uintptr_t)mmap_addr + + mmap_offset; + + RTE_LOG(INFO, VHOST_CONFIG, + "BAR memory region %u, size: 0x%" PRIx64 "\n" + "\t guest physical addr: 0x%" PRIx64 "\n" + "\t guest virtual addr: 0x%" PRIx64 "\n" + "\t host virtual addr: 0x%" PRIx64 "\n" + "\t mmap addr : 0x%" PRIx64 "\n" + "\t mmap size : 0x%" PRIx64 "\n" + "\t mmap align: 0x%" PRIx64 "\n" + "\t mmap off : 0x%" PRIx64 "\n", + 0, reg.size, + reg.guest_phys_addr, + reg.guest_user_addr, + reg.host_user_addr, + (uint64_t)(uintptr_t)mmap_addr, + mmap_size, + alignment, + mmap_offset); + + if (dev->bar_addr) { + munmap((void *)(uintptr_t)dev->bar_addr, dev->bar_size); + } + dev->bar_addr = (void *)(uintptr_t)reg.host_user_addr; + dev->bar_size = reg.mmap_size; + + if (dev->notify_ops->vhost_nvme_set_bar_mr) { + ret = dev->notify_ops->vhost_nvme_set_bar_mr(dev->vid, dev->bar_addr, dev->bar_size); + if (ret) { + munmap((void *)(uintptr_t)dev->bar_addr, dev->bar_size); + dev->bar_addr = NULL; + dev->bar_size = 0; + } + } + + return ret; +} + int vhost_user_msg_handler(int vid, int fd) { @@ -1243,6 +1333,9 @@ vhost_user_msg_handler(int vid, int fd) is_submission_queue = (msg.payload.nvme_io.queue_type == VHOST_USER_NVME_SUBMISSION_QUEUE) ? true : false; vhost_user_nvme_io_request_passthrough(dev, qid, tail_head, is_submission_queue); break; + case VHOST_USER_NVME_SET_BAR_MR: + ret = vhost_user_nvme_set_bar_mr(dev, &msg); + break; case VHOST_USER_GET_FEATURES: msg.payload.u64 = vhost_user_get_features(dev); msg.size = sizeof(msg.payload.u64); diff --git a/lib/vhost/rte_vhost/vhost_user.h b/lib/vhost/rte_vhost/vhost_user.h index cb5ff0a63..8d9d33de0 100644 --- a/lib/vhost/rte_vhost/vhost_user.h +++ b/lib/vhost/rte_vhost/vhost_user.h @@ -91,6 +91,7 @@ typedef enum VhostUserRequest { VHOST_USER_NVME_GET_CAP = 82, VHOST_USER_NVME_START_STOP = 83, VHOST_USER_NVME_IO_CMD = 84, + VHOST_USER_NVME_SET_BAR_MR = 85, VHOST_USER_MAX } VhostUserRequest; diff --git a/lib/vhost/vhost.c b/lib/vhost/vhost.c index 0cacf613f..c30b28d5d 100644 --- a/lib/vhost/vhost.c +++ b/lib/vhost/vhost.c @@ -82,6 +82,7 @@ const struct vhost_device_ops g_spdk_vhost_ops = { .vhost_nvme_admin_passthrough = spdk_vhost_nvme_admin_passthrough, .vhost_nvme_set_cq_call = spdk_vhost_nvme_set_cq_call, .vhost_nvme_get_cap = spdk_vhost_nvme_get_cap, + .vhost_nvme_set_bar_mr = spdk_vhost_nvme_set_bar_mr, }; static TAILQ_HEAD(, spdk_vhost_dev) g_spdk_vhost_devices = TAILQ_HEAD_INITIALIZER( diff --git a/lib/vhost/vhost_internal.h b/lib/vhost/vhost_internal.h index 9c0ad211d..352765c79 100644 --- a/lib/vhost/vhost_internal.h +++ b/lib/vhost/vhost_internal.h @@ -267,6 +267,7 @@ void spdk_vhost_unlock(void); int spdk_remove_vhost_controller(struct spdk_vhost_dev *vdev); int spdk_vhost_nvme_admin_passthrough(int vid, void *cmd, void *cqe, void *buf); int spdk_vhost_nvme_set_cq_call(int vid, uint16_t qid, int fd); +int spdk_vhost_nvme_set_bar_mr(int vid, void *bar_addr, uint64_t bar_size); int spdk_vhost_nvme_get_cap(int vid, uint64_t *cap); int spdk_vhost_nvme_controller_construct(void); int spdk_vhost_nvme_dev_construct(const char *name, const char *cpumask, uint32_t io_queues); diff --git a/lib/vhost/vhost_nvme.c b/lib/vhost/vhost_nvme.c index 35015d93b..acd9234b7 100644 --- a/lib/vhost/vhost_nvme.c +++ b/lib/vhost/vhost_nvme.c @@ -136,6 +136,11 @@ struct spdk_vhost_nvme_dev { uint32_t num_ns; struct spdk_vhost_nvme_ns ns[MAX_NAMESPACE]; + volatile uint32_t *bar; + volatile uint32_t *bar_db; + uint64_t bar_size; + bool dataplane_started; + volatile uint32_t *dbbuf_dbs; volatile uint32_t *dbbuf_eis; struct spdk_vhost_nvme_sq sq_queue[MAX_IO_QUEUES + 1]; @@ -224,6 +229,21 @@ spdk_vhost_nvme_get_cq_from_qid(struct spdk_vhost_nvme_dev *dev, uint16_t qid) return &dev->cq_queue[qid]; } +static inline uint32_t +spdk_vhost_nvme_get_queue_head(struct spdk_vhost_nvme_dev *nvme, uint32_t offset) +{ + if (nvme->dataplane_started) { + return nvme->dbbuf_dbs[offset]; + + } else if (nvme->bar) { + return nvme->bar_db[offset]; + } + + assert(0); + + return 0; +} + static int spdk_nvme_map_prps(struct spdk_vhost_nvme_dev *nvme, struct spdk_nvme_cmd *cmd, struct spdk_vhost_nvme_task *task, uint32_t len) @@ -309,7 +329,7 @@ spdk_nvme_cq_signal_fd(struct spdk_vhost_nvme_dev *nvme) continue; } - cq_head = nvme->dbbuf_dbs[cq_offset(qid, 1)]; + cq_head = spdk_vhost_nvme_get_queue_head(nvme, cq_offset(qid, 1)); if (cq->irq_enabled && cq->need_signaled_cnt && (cq->cq_head != cq_head)) { eventfd_write(cq->virq, (eventfd_t)1); cq->need_signaled_cnt = 0; @@ -334,7 +354,7 @@ spdk_vhost_nvme_task_complete(struct spdk_vhost_nvme_task *task) return; } - cq->guest_signaled_cq_head = nvme->dbbuf_dbs[cq_offset(cqid, 1)]; + cq->guest_signaled_cq_head = spdk_vhost_nvme_get_queue_head(nvme, cq_offset(cqid, 1)); if (spdk_unlikely(nvme_cq_is_full(cq))) { STAILQ_INSERT_TAIL(&cq->cq_full_waited_tasks, task, stailq); return; @@ -355,7 +375,9 @@ spdk_vhost_nvme_task_complete(struct spdk_vhost_nvme_task *task) cq->need_signaled_cnt++; /* MMIO Controll */ - nvme->dbbuf_eis[cq_offset(cqid, 1)] = (uint32_t)(cq->guest_signaled_cq_head - 1); + if (nvme->dataplane_started) { + nvme->dbbuf_eis[cq_offset(cqid, 1)] = (uint32_t)(cq->guest_signaled_cq_head - 1); + } STAILQ_INSERT_TAIL(&nvme->free_tasks, task, stailq); } @@ -607,10 +629,7 @@ nvme_worker(void *arg) return -1; } - /* worker thread can't start before the admin doorbell - * buffer config command - */ - if (spdk_unlikely(!nvme->dbbuf_dbs)) { + if (spdk_unlikely(!nvme->dataplane_started && !nvme->bar)) { return -1; } @@ -624,7 +643,7 @@ nvme_worker(void *arg) if (spdk_unlikely(!cq)) { return -1; } - cq->guest_signaled_cq_head = nvme->dbbuf_dbs[cq_offset(sq->cqid, 1)]; + cq->guest_signaled_cq_head = spdk_vhost_nvme_get_queue_head(nvme, cq_offset(sq->cqid, 1)); if (spdk_unlikely(!STAILQ_EMPTY(&cq->cq_full_waited_tasks) && !nvme_cq_is_full(cq))) { task = STAILQ_FIRST(&cq->cq_full_waited_tasks); @@ -632,7 +651,7 @@ nvme_worker(void *arg) spdk_vhost_nvme_task_complete(task); } - dbbuf_sq = nvme->dbbuf_dbs[sq_offset(qid, 1)]; + dbbuf_sq = spdk_vhost_nvme_get_queue_head(nvme, sq_offset(qid, 1)); sq->sq_tail = (uint16_t)dbbuf_sq; count = 0; @@ -658,7 +677,9 @@ nvme_worker(void *arg) } /* MMIO Control */ - nvme->dbbuf_eis[sq_offset(qid, 1)] = (uint32_t)(sq->sq_head - 1); + if (nvme->dataplane_started) { + nvme->dbbuf_eis[sq_offset(qid, 1)] = (uint32_t)(sq->sq_head - 1); + } /* Maximum batch I/Os to pick up at once */ if (count++ == MAX_BATCH_IO) { @@ -697,6 +718,10 @@ vhost_nvme_doorbell_buffer_config(struct spdk_vhost_nvme_dev *nvme, cpl->status.sc = 0; cpl->status.sct = 0; + + /* Data plane started */ + nvme->dataplane_started = true; + return 0; } @@ -744,6 +769,9 @@ vhost_nvme_create_io_sq(struct spdk_vhost_nvme_dev *nvme, } nvme->num_sqs++; sq->valid = true; + if (nvme->bar) { + nvme->bar_db[sq_offset(qid, 1)] = 0; + } cpl->status.sc = 0; cpl->status.sct = 0; @@ -824,6 +852,9 @@ vhost_nvme_create_io_cq(struct spdk_vhost_nvme_dev *nvme, } nvme->num_cqs++; cq->valid = true; + if (nvme->bar) { + nvme->bar_db[cq_offset(qid, 1)] = 0; + } STAILQ_INIT(&cq->cq_full_waited_tasks); cpl->status.sc = 0; @@ -890,7 +921,6 @@ spdk_vhost_nvme_admin_passthrough(int vid, void *cmd, void *cqe, void *buf) struct spdk_vhost_nvme_ns *ns; int ret = 0; struct spdk_vhost_nvme_dev *nvme; - uint32_t cq_head, sq_tail; nvme = spdk_vhost_nvme_get_by_name(vid); if (!nvme) { @@ -943,10 +973,6 @@ spdk_vhost_nvme_admin_passthrough(int vid, void *cmd, void *cqe, void *buf) ret = vhost_nvme_doorbell_buffer_config(nvme, req, cpl); break; case SPDK_NVME_OPC_ABORT: - sq_tail = nvme->dbbuf_dbs[sq_offset(1, 1)] & 0xffffu; - cq_head = nvme->dbbuf_dbs[cq_offset(1, 1)] & 0xffffu; - SPDK_NOTICELOG("ABORT: CID %u, SQ_TAIL %u, CQ_HEAD %u\n", - (req->cdw10 >> 16) & 0xffffu, sq_tail, cq_head); /* TODO: ABORT failed fow now */ cpl->cdw0 = 1; cpl->status.sc = 0; @@ -961,6 +987,24 @@ spdk_vhost_nvme_admin_passthrough(int vid, void *cmd, void *cqe, void *buf) return 0; } +int +spdk_vhost_nvme_set_bar_mr(int vid, void *bar_addr, uint64_t bar_size) +{ + struct spdk_vhost_nvme_dev *nvme; + + nvme = spdk_vhost_nvme_get_by_name(vid); + if (!nvme) { + return -1; + } + + nvme->bar = (volatile uint32_t *)(uintptr_t)(bar_addr); + /* BAR0 SQ/CQ doorbell registers start from offset 0x1000 */ + nvme->bar_db = (volatile uint32_t *)(uintptr_t)(bar_addr + 0x1000ull); + nvme->bar_size = bar_size; + + return 0; +} + int spdk_vhost_nvme_set_cq_call(int vid, uint16_t qid, int fd) { @@ -1095,10 +1139,15 @@ destroy_device_poller_cb(void *arg) ns_dev->bdev_io_channel = NULL; } } + /* Clear BAR space */ + if (nvme->bar) { + memset((void *)nvme->bar, 0, nvme->bar_size); + } nvme->num_sqs = 0; nvme->num_cqs = 0; nvme->dbbuf_dbs = NULL; nvme->dbbuf_eis = NULL; + nvme->dataplane_started = false; } } diff --git a/test/unit/lib/vhost/vhost.c/vhost_ut.c b/test/unit/lib/vhost/vhost.c/vhost_ut.c index 49e879ed5..0f39c5569 100644 --- a/test/unit/lib/vhost/vhost.c/vhost_ut.c +++ b/test/unit/lib/vhost/vhost.c/vhost_ut.c @@ -105,6 +105,7 @@ DEFINE_STUB(spdk_vhost_scsi_controller_construct, int, (void), 0); DEFINE_STUB(spdk_vhost_blk_controller_construct, int, (void), 0); DEFINE_STUB(spdk_vhost_nvme_admin_passthrough, int, (int vid, void *cmd, void *cqe, void *buf), 0); DEFINE_STUB(spdk_vhost_nvme_set_cq_call, int, (int vid, uint16_t qid, int fd), 0); +DEFINE_STUB(spdk_vhost_nvme_set_bar_mr, int, (int vid, void *bar, uint64_t bar_size), 0); DEFINE_STUB(spdk_vhost_nvme_get_cap, int, (int vid, uint64_t *cap), 0); DEFINE_STUB(spdk_vhost_nvme_controller_construct, int, (void), 0); DEFINE_STUB(rte_vhost_set_vhost_vring_last_idx, int,