diff --git a/lib/nvme/Makefile b/lib/nvme/Makefile index 776de0b0d..888c99843 100644 --- a/lib/nvme/Makefile +++ b/lib/nvme/Makefile @@ -37,8 +37,8 @@ include $(SPDK_ROOT_DIR)/mk/spdk.common.mk SO_VER := 4 SO_MINOR := 2 -C_SRCS = nvme_ctrlr_cmd.c nvme_ctrlr.c nvme_fabric.c nvme_ns_cmd.c nvme_ns.c nvme_pcie.c nvme_qpair.c nvme.c nvme_quirks.c nvme_transport.c nvme_uevent.c nvme_ctrlr_ocssd_cmd.c \ - nvme_ns_ocssd_cmd.c nvme_tcp.c nvme_opal.c nvme_io_msg.c nvme_poll_group.c nvme_zns.c +C_SRCS = nvme_ctrlr_cmd.c nvme_ctrlr.c nvme_fabric.c nvme_ns_cmd.c nvme_ns.c nvme_pcie_common.c nvme_pcie.c nvme_qpair.c nvme.c nvme_quirks.c nvme_transport.c nvme_uevent.c \ + nvme_ctrlr_ocssd_cmd.c nvme_ns_ocssd_cmd.c nvme_tcp.c nvme_opal.c nvme_io_msg.c nvme_poll_group.c nvme_zns.c C_SRCS-$(CONFIG_RDMA) += nvme_rdma.c C_SRCS-$(CONFIG_NVME_CUSE) += nvme_cuse.c diff --git a/lib/nvme/nvme_pcie.c b/lib/nvme/nvme_pcie.c index 97067bfd5..216d90206 100644 --- a/lib/nvme/nvme_pcie.c +++ b/lib/nvme/nvme_pcie.c @@ -41,174 +41,17 @@ #include "spdk/likely.h" #include "spdk/string.h" #include "nvme_internal.h" +#include "nvme_pcie_internal.h" #include "nvme_uevent.h" -/* - * Number of completion queue entries to process before ringing the - * completion queue doorbell. - */ -#define NVME_MIN_COMPLETIONS (1) -#define NVME_MAX_COMPLETIONS (128) - -/* - * NVME_MAX_SGL_DESCRIPTORS defines the maximum number of descriptors in one SGL - * segment. - */ -#define NVME_MAX_SGL_DESCRIPTORS (250) - -#define NVME_MAX_PRP_LIST_ENTRIES (503) - struct nvme_pcie_enum_ctx { struct spdk_nvme_probe_ctx *probe_ctx; struct spdk_pci_addr pci_addr; bool has_pci_addr; }; -/* PCIe transport extensions for spdk_nvme_ctrlr */ -struct nvme_pcie_ctrlr { - struct spdk_nvme_ctrlr ctrlr; - - /** NVMe MMIO register space */ - volatile struct spdk_nvme_registers *regs; - - /** NVMe MMIO register size */ - uint64_t regs_size; - - struct { - /* BAR mapping address which contains controller memory buffer */ - void *bar_va; - - /* BAR physical address which contains controller memory buffer */ - uint64_t bar_pa; - - /* Controller memory buffer size in Bytes */ - uint64_t size; - - /* Current offset of controller memory buffer, relative to start of BAR virt addr */ - uint64_t current_offset; - - void *mem_register_addr; - size_t mem_register_size; - } cmb; - - /** stride in uint32_t units between doorbell registers (1 = 4 bytes, 2 = 8 bytes, ...) */ - uint32_t doorbell_stride_u32; - - /* Opaque handle to associated PCI device. */ - struct spdk_pci_device *devhandle; - - /* Flag to indicate the MMIO register has been remapped */ - bool is_remapped; -}; - -struct nvme_tracker { - TAILQ_ENTRY(nvme_tracker) tq_list; - - struct nvme_request *req; - uint16_t cid; - - uint16_t rsvd0; - uint32_t rsvd1; - - spdk_nvme_cmd_cb cb_fn; - void *cb_arg; - - uint64_t prp_sgl_bus_addr; - - /* Don't move, metadata SGL is always contiguous with Data Block SGL */ - struct spdk_nvme_sgl_descriptor meta_sgl; - union { - uint64_t prp[NVME_MAX_PRP_LIST_ENTRIES]; - struct spdk_nvme_sgl_descriptor sgl[NVME_MAX_SGL_DESCRIPTORS]; - } u; -}; -/* - * struct nvme_tracker must be exactly 4K so that the prp[] array does not cross a page boundary - * and so that there is no padding required to meet alignment requirements. - */ -SPDK_STATIC_ASSERT(sizeof(struct nvme_tracker) == 4096, "nvme_tracker is not 4K"); -SPDK_STATIC_ASSERT((offsetof(struct nvme_tracker, u.sgl) & 7) == 0, "SGL must be Qword aligned"); -SPDK_STATIC_ASSERT((offsetof(struct nvme_tracker, meta_sgl) & 7) == 0, "SGL must be Qword aligned"); - -struct nvme_pcie_poll_group { - struct spdk_nvme_transport_poll_group group; -}; - -/* PCIe transport extensions for spdk_nvme_qpair */ -struct nvme_pcie_qpair { - /* Submission queue tail doorbell */ - volatile uint32_t *sq_tdbl; - - /* Completion queue head doorbell */ - volatile uint32_t *cq_hdbl; - - /* Submission queue */ - struct spdk_nvme_cmd *cmd; - - /* Completion queue */ - struct spdk_nvme_cpl *cpl; - - TAILQ_HEAD(, nvme_tracker) free_tr; - TAILQ_HEAD(nvme_outstanding_tr_head, nvme_tracker) outstanding_tr; - - /* Array of trackers indexed by command ID. */ - struct nvme_tracker *tr; - - uint16_t num_entries; - - uint8_t retry_count; - - uint16_t max_completions_cap; - - uint16_t last_sq_tail; - uint16_t sq_tail; - uint16_t cq_head; - uint16_t sq_head; - - struct { - uint8_t phase : 1; - uint8_t delay_cmd_submit : 1; - uint8_t has_shadow_doorbell : 1; - } flags; - - /* - * Base qpair structure. - * This is located after the hot data in this structure so that the important parts of - * nvme_pcie_qpair are in the same cache line. - */ - struct spdk_nvme_qpair qpair; - - struct { - /* Submission queue shadow tail doorbell */ - volatile uint32_t *sq_tdbl; - - /* Completion queue shadow head doorbell */ - volatile uint32_t *cq_hdbl; - - /* Submission queue event index */ - volatile uint32_t *sq_eventidx; - - /* Completion queue event index */ - volatile uint32_t *cq_eventidx; - } shadow_doorbell; - - /* - * Fields below this point should not be touched on the normal I/O path. - */ - - bool sq_in_cmb; - - uint64_t cmd_bus_addr; - uint64_t cpl_bus_addr; - - struct spdk_nvme_cmd *sq_vaddr; - struct spdk_nvme_cpl *cq_vaddr; -}; - static int nvme_pcie_ctrlr_attach(struct spdk_nvme_probe_ctx *probe_ctx, struct spdk_pci_addr *pci_addr); -static int nvme_pcie_qpair_construct(struct spdk_nvme_qpair *qpair, - const struct spdk_nvme_io_qpair_opts *opts); static int nvme_pcie_qpair_destroy(struct spdk_nvme_qpair *qpair); __thread struct nvme_pcie_ctrlr *g_thread_mmio_ctrlr = NULL; @@ -256,13 +99,6 @@ nvme_pcie_ctrlr_setup_signal(void) sigaction(SIGBUS, &sa, NULL); } -static inline struct nvme_pcie_ctrlr * -nvme_pcie_ctrlr(struct spdk_nvme_ctrlr *ctrlr) -{ - assert(ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE); - return SPDK_CONTAINEROF(ctrlr, struct nvme_pcie_ctrlr, ctrlr); -} - static int _nvme_pcie_hotplug_monitor(struct spdk_nvme_probe_ctx *probe_ctx) { @@ -343,13 +179,6 @@ _nvme_pcie_hotplug_monitor(struct spdk_nvme_probe_ctx *probe_ctx) return 0; } -static inline struct nvme_pcie_qpair * -nvme_pcie_qpair(struct spdk_nvme_qpair *qpair) -{ - assert(qpair->trtype == SPDK_NVME_TRANSPORT_PCIE); - return SPDK_CONTAINEROF(qpair, struct nvme_pcie_qpair, qpair); -} - static volatile void * nvme_pcie_reg_addr(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset) { @@ -699,34 +528,6 @@ nvme_pcie_ctrlr_free_bars(struct nvme_pcie_ctrlr *pctrlr) return rc; } -static int -nvme_pcie_ctrlr_construct_admin_qpair(struct spdk_nvme_ctrlr *ctrlr, uint16_t num_entries) -{ - struct nvme_pcie_qpair *pqpair; - int rc; - - pqpair = spdk_zmalloc(sizeof(*pqpair), 64, NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE); - if (pqpair == NULL) { - return -ENOMEM; - } - - pqpair->num_entries = num_entries; - pqpair->flags.delay_cmd_submit = 0; - - ctrlr->adminq = &pqpair->qpair; - - rc = nvme_qpair_init(ctrlr->adminq, - 0, /* qpair ID */ - ctrlr, - SPDK_NVME_QPRIO_URGENT, - num_entries); - if (rc != 0) { - return rc; - } - - return nvme_pcie_qpair_construct(ctrlr->adminq, NULL); -} - /* This function must only be called while holding g_spdk_nvme_driver->lock */ static int pcie_nvme_enum_cb(void *ctx, struct spdk_pci_device *pci_dev) @@ -953,201 +754,6 @@ nvme_pcie_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr) return 0; } -static void -nvme_qpair_construct_tracker(struct nvme_tracker *tr, uint16_t cid, uint64_t phys_addr) -{ - tr->prp_sgl_bus_addr = phys_addr + offsetof(struct nvme_tracker, u.prp); - tr->cid = cid; - tr->req = NULL; -} - -static int -nvme_pcie_qpair_reset(struct spdk_nvme_qpair *qpair) -{ - struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); - uint32_t i; - - /* all head/tail vals are set to 0 */ - pqpair->last_sq_tail = pqpair->sq_tail = pqpair->sq_head = pqpair->cq_head = 0; - - /* - * First time through the completion queue, HW will set phase - * bit on completions to 1. So set this to 1 here, indicating - * we're looking for a 1 to know which entries have completed. - * we'll toggle the bit each time when the completion queue - * rolls over. - */ - pqpair->flags.phase = 1; - for (i = 0; i < pqpair->num_entries; i++) { - pqpair->cpl[i].status.p = 0; - } - - return 0; -} - -static void * -nvme_pcie_ctrlr_alloc_cmb(struct spdk_nvme_ctrlr *ctrlr, uint64_t size, uint64_t alignment, - uint64_t *phys_addr) -{ - struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr); - uintptr_t addr; - - if (pctrlr->cmb.mem_register_addr != NULL) { - /* BAR is mapped for data */ - return NULL; - } - - addr = (uintptr_t)pctrlr->cmb.bar_va + pctrlr->cmb.current_offset; - addr = (addr + (alignment - 1)) & ~(alignment - 1); - - /* CMB may only consume part of the BAR, calculate accordingly */ - if (addr + size > ((uintptr_t)pctrlr->cmb.bar_va + pctrlr->cmb.size)) { - SPDK_ERRLOG("Tried to allocate past valid CMB range!\n"); - return NULL; - } - *phys_addr = pctrlr->cmb.bar_pa + addr - (uintptr_t)pctrlr->cmb.bar_va; - - pctrlr->cmb.current_offset = (addr + size) - (uintptr_t)pctrlr->cmb.bar_va; - - return (void *)addr; -} - -static int -nvme_pcie_qpair_construct(struct spdk_nvme_qpair *qpair, - const struct spdk_nvme_io_qpair_opts *opts) -{ - struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; - struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr); - struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); - struct nvme_tracker *tr; - uint16_t i; - volatile uint32_t *doorbell_base; - uint16_t num_trackers; - size_t page_align = sysconf(_SC_PAGESIZE); - size_t queue_align, queue_len; - uint32_t flags = SPDK_MALLOC_DMA; - uint64_t sq_paddr = 0; - uint64_t cq_paddr = 0; - - if (opts) { - pqpair->sq_vaddr = opts->sq.vaddr; - pqpair->cq_vaddr = opts->cq.vaddr; - sq_paddr = opts->sq.paddr; - cq_paddr = opts->cq.paddr; - } - - pqpair->retry_count = ctrlr->opts.transport_retry_count; - - /* - * Limit the maximum number of completions to return per call to prevent wraparound, - * and calculate how many trackers can be submitted at once without overflowing the - * completion queue. - */ - pqpair->max_completions_cap = pqpair->num_entries / 4; - pqpair->max_completions_cap = spdk_max(pqpair->max_completions_cap, NVME_MIN_COMPLETIONS); - pqpair->max_completions_cap = spdk_min(pqpair->max_completions_cap, NVME_MAX_COMPLETIONS); - num_trackers = pqpair->num_entries - pqpair->max_completions_cap; - - SPDK_INFOLOG(nvme, "max_completions_cap = %" PRIu16 " num_trackers = %" PRIu16 "\n", - pqpair->max_completions_cap, num_trackers); - - assert(num_trackers != 0); - - pqpair->sq_in_cmb = false; - - if (nvme_qpair_is_admin_queue(&pqpair->qpair)) { - flags |= SPDK_MALLOC_SHARE; - } - - /* cmd and cpl rings must be aligned on page size boundaries. */ - if (ctrlr->opts.use_cmb_sqs) { - pqpair->cmd = nvme_pcie_ctrlr_alloc_cmb(ctrlr, pqpair->num_entries * sizeof(struct spdk_nvme_cmd), - page_align, &pqpair->cmd_bus_addr); - if (pqpair->cmd != NULL) { - pqpair->sq_in_cmb = true; - } - } - - if (pqpair->sq_in_cmb == false) { - if (pqpair->sq_vaddr) { - pqpair->cmd = pqpair->sq_vaddr; - } else { - /* To ensure physical address contiguity we make each ring occupy - * a single hugepage only. See MAX_IO_QUEUE_ENTRIES. - */ - queue_len = pqpair->num_entries * sizeof(struct spdk_nvme_cmd); - queue_align = spdk_max(spdk_align32pow2(queue_len), page_align); - pqpair->cmd = spdk_zmalloc(queue_len, queue_align, NULL, SPDK_ENV_SOCKET_ID_ANY, flags); - if (pqpair->cmd == NULL) { - SPDK_ERRLOG("alloc qpair_cmd failed\n"); - return -ENOMEM; - } - } - if (sq_paddr) { - assert(pqpair->sq_vaddr != NULL); - pqpair->cmd_bus_addr = sq_paddr; - } else { - pqpair->cmd_bus_addr = spdk_vtophys(pqpair->cmd, NULL); - if (pqpair->cmd_bus_addr == SPDK_VTOPHYS_ERROR) { - SPDK_ERRLOG("spdk_vtophys(pqpair->cmd) failed\n"); - return -EFAULT; - } - } - } - - if (pqpair->cq_vaddr) { - pqpair->cpl = pqpair->cq_vaddr; - } else { - queue_len = pqpair->num_entries * sizeof(struct spdk_nvme_cpl); - queue_align = spdk_max(spdk_align32pow2(queue_len), page_align); - pqpair->cpl = spdk_zmalloc(queue_len, queue_align, NULL, SPDK_ENV_SOCKET_ID_ANY, flags); - if (pqpair->cpl == NULL) { - SPDK_ERRLOG("alloc qpair_cpl failed\n"); - return -ENOMEM; - } - } - if (cq_paddr) { - assert(pqpair->cq_vaddr != NULL); - pqpair->cpl_bus_addr = cq_paddr; - } else { - pqpair->cpl_bus_addr = spdk_vtophys(pqpair->cpl, NULL); - if (pqpair->cpl_bus_addr == SPDK_VTOPHYS_ERROR) { - SPDK_ERRLOG("spdk_vtophys(pqpair->cpl) failed\n"); - return -EFAULT; - } - } - - doorbell_base = &pctrlr->regs->doorbell[0].sq_tdbl; - pqpair->sq_tdbl = doorbell_base + (2 * qpair->id + 0) * pctrlr->doorbell_stride_u32; - pqpair->cq_hdbl = doorbell_base + (2 * qpair->id + 1) * pctrlr->doorbell_stride_u32; - - /* - * Reserve space for all of the trackers in a single allocation. - * struct nvme_tracker must be padded so that its size is already a power of 2. - * This ensures the PRP list embedded in the nvme_tracker object will not span a - * 4KB boundary, while allowing access to trackers in tr[] via normal array indexing. - */ - pqpair->tr = spdk_zmalloc(num_trackers * sizeof(*tr), sizeof(*tr), NULL, - SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE); - if (pqpair->tr == NULL) { - SPDK_ERRLOG("nvme_tr failed\n"); - return -ENOMEM; - } - - TAILQ_INIT(&pqpair->free_tr); - TAILQ_INIT(&pqpair->outstanding_tr); - - for (i = 0; i < num_trackers; i++) { - tr = &pqpair->tr[i]; - nvme_qpair_construct_tracker(tr, i, spdk_vtophys(tr, NULL)); - TAILQ_INSERT_HEAD(&pqpair->free_tr, tr, tq_list); - } - - nvme_pcie_qpair_reset(qpair); - - return 0; -} - /* Used when dst points to MMIO (i.e. CMB) in a virtual machine - in these cases we must * not use wide instructions because QEMU will not emulate such instructions to MMIO space. * So this function ensures we only copy 8 bytes at a time. @@ -1181,71 +787,6 @@ nvme_pcie_copy_command(struct spdk_nvme_cmd *dst, const struct spdk_nvme_cmd *sr #endif } -/** - * Note: the ctrlr_lock must be held when calling this function. - */ -static void -nvme_pcie_qpair_insert_pending_admin_request(struct spdk_nvme_qpair *qpair, - struct nvme_request *req, struct spdk_nvme_cpl *cpl) -{ - struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; - struct nvme_request *active_req = req; - struct spdk_nvme_ctrlr_process *active_proc; - - /* - * The admin request is from another process. Move to the per - * process list for that process to handle it later. - */ - assert(nvme_qpair_is_admin_queue(qpair)); - assert(active_req->pid != getpid()); - - active_proc = nvme_ctrlr_get_process(ctrlr, active_req->pid); - if (active_proc) { - /* Save the original completion information */ - memcpy(&active_req->cpl, cpl, sizeof(*cpl)); - STAILQ_INSERT_TAIL(&active_proc->active_reqs, active_req, stailq); - } else { - SPDK_ERRLOG("The owning process (pid %d) is not found. Dropping the request.\n", - active_req->pid); - - nvme_free_request(active_req); - } -} - -/** - * Note: the ctrlr_lock must be held when calling this function. - */ -static void -nvme_pcie_qpair_complete_pending_admin_request(struct spdk_nvme_qpair *qpair) -{ - struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; - struct nvme_request *req, *tmp_req; - pid_t pid = getpid(); - struct spdk_nvme_ctrlr_process *proc; - - /* - * Check whether there is any pending admin request from - * other active processes. - */ - assert(nvme_qpair_is_admin_queue(qpair)); - - proc = nvme_ctrlr_get_current_process(ctrlr); - if (!proc) { - SPDK_ERRLOG("the active process (pid %d) is not found for this controller.\n", pid); - assert(proc); - return; - } - - STAILQ_FOREACH_SAFE(req, &proc->active_reqs, stailq, tmp_req) { - STAILQ_REMOVE(&proc->active_reqs, req, nvme_request, stailq); - - assert(req->pid == pid); - - nvme_complete_request(req->cb_fn, req->cb_arg, qpair, req, &req->cpl); - nvme_free_request(req); - } -} - static inline int nvme_pcie_qpair_need_event(uint16_t event_idx, uint16_t new_idx, uint16_t old) { @@ -1538,180 +1079,6 @@ nvme_pcie_qpair_abort_reqs(struct spdk_nvme_qpair *qpair, uint32_t dnr) nvme_pcie_qpair_abort_trackers(qpair, dnr); } -static int -nvme_pcie_ctrlr_cmd_create_io_cq(struct spdk_nvme_ctrlr *ctrlr, - struct spdk_nvme_qpair *io_que, spdk_nvme_cmd_cb cb_fn, - void *cb_arg) -{ - struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(io_que); - struct nvme_request *req; - struct spdk_nvme_cmd *cmd; - - req = nvme_allocate_request_null(ctrlr->adminq, cb_fn, cb_arg); - if (req == NULL) { - return -ENOMEM; - } - - cmd = &req->cmd; - cmd->opc = SPDK_NVME_OPC_CREATE_IO_CQ; - - cmd->cdw10_bits.create_io_q.qid = io_que->id; - cmd->cdw10_bits.create_io_q.qsize = pqpair->num_entries - 1; - - cmd->cdw11_bits.create_io_cq.pc = 1; - cmd->dptr.prp.prp1 = pqpair->cpl_bus_addr; - - return nvme_ctrlr_submit_admin_request(ctrlr, req); -} - -static int -nvme_pcie_ctrlr_cmd_create_io_sq(struct spdk_nvme_ctrlr *ctrlr, - struct spdk_nvme_qpair *io_que, spdk_nvme_cmd_cb cb_fn, void *cb_arg) -{ - struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(io_que); - struct nvme_request *req; - struct spdk_nvme_cmd *cmd; - - req = nvme_allocate_request_null(ctrlr->adminq, cb_fn, cb_arg); - if (req == NULL) { - return -ENOMEM; - } - - cmd = &req->cmd; - cmd->opc = SPDK_NVME_OPC_CREATE_IO_SQ; - - cmd->cdw10_bits.create_io_q.qid = io_que->id; - cmd->cdw10_bits.create_io_q.qsize = pqpair->num_entries - 1; - cmd->cdw11_bits.create_io_sq.pc = 1; - cmd->cdw11_bits.create_io_sq.qprio = io_que->qprio; - cmd->cdw11_bits.create_io_sq.cqid = io_que->id; - cmd->dptr.prp.prp1 = pqpair->cmd_bus_addr; - - return nvme_ctrlr_submit_admin_request(ctrlr, req); -} - -static int -nvme_pcie_ctrlr_cmd_delete_io_cq(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair, - spdk_nvme_cmd_cb cb_fn, void *cb_arg) -{ - struct nvme_request *req; - struct spdk_nvme_cmd *cmd; - - req = nvme_allocate_request_null(ctrlr->adminq, cb_fn, cb_arg); - if (req == NULL) { - return -ENOMEM; - } - - cmd = &req->cmd; - cmd->opc = SPDK_NVME_OPC_DELETE_IO_CQ; - cmd->cdw10_bits.delete_io_q.qid = qpair->id; - - return nvme_ctrlr_submit_admin_request(ctrlr, req); -} - -static int -nvme_pcie_ctrlr_cmd_delete_io_sq(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair, - spdk_nvme_cmd_cb cb_fn, void *cb_arg) -{ - struct nvme_request *req; - struct spdk_nvme_cmd *cmd; - - req = nvme_allocate_request_null(ctrlr->adminq, cb_fn, cb_arg); - if (req == NULL) { - return -ENOMEM; - } - - cmd = &req->cmd; - cmd->opc = SPDK_NVME_OPC_DELETE_IO_SQ; - cmd->cdw10_bits.delete_io_q.qid = qpair->id; - - return nvme_ctrlr_submit_admin_request(ctrlr, req); -} - -static int -_nvme_pcie_ctrlr_create_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair, - uint16_t qid) -{ - struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr); - struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); - struct nvme_completion_poll_status *status; - int rc; - - status = calloc(1, sizeof(*status)); - if (!status) { - SPDK_ERRLOG("Failed to allocate status tracker\n"); - return -ENOMEM; - } - - rc = nvme_pcie_ctrlr_cmd_create_io_cq(ctrlr, qpair, nvme_completion_poll_cb, status); - if (rc != 0) { - free(status); - return rc; - } - - if (nvme_wait_for_completion(ctrlr->adminq, status)) { - SPDK_ERRLOG("nvme_create_io_cq failed!\n"); - if (!status->timed_out) { - free(status); - } - return -1; - } - - memset(status, 0, sizeof(*status)); - rc = nvme_pcie_ctrlr_cmd_create_io_sq(qpair->ctrlr, qpair, nvme_completion_poll_cb, status); - if (rc != 0) { - free(status); - return rc; - } - - if (nvme_wait_for_completion(ctrlr->adminq, status)) { - SPDK_ERRLOG("nvme_create_io_sq failed!\n"); - if (status->timed_out) { - /* Request is still queued, the memory will be freed in a completion callback. - allocate a new request */ - status = calloc(1, sizeof(*status)); - if (!status) { - SPDK_ERRLOG("Failed to allocate status tracker\n"); - return -ENOMEM; - } - } - - memset(status, 0, sizeof(*status)); - /* Attempt to delete the completion queue */ - rc = nvme_pcie_ctrlr_cmd_delete_io_cq(qpair->ctrlr, qpair, nvme_completion_poll_cb, status); - if (rc != 0) { - /* The originall or newly allocated status structure can be freed since - * the corresponding request has been completed of failed to submit */ - free(status); - return -1; - } - nvme_wait_for_completion(ctrlr->adminq, status); - if (!status->timed_out) { - /* status can be freed regardless of nvme_wait_for_completion return value */ - free(status); - } - return -1; - } - - if (ctrlr->shadow_doorbell) { - pqpair->shadow_doorbell.sq_tdbl = ctrlr->shadow_doorbell + (2 * qpair->id + 0) * - pctrlr->doorbell_stride_u32; - pqpair->shadow_doorbell.cq_hdbl = ctrlr->shadow_doorbell + (2 * qpair->id + 1) * - pctrlr->doorbell_stride_u32; - pqpair->shadow_doorbell.sq_eventidx = ctrlr->eventidx + (2 * qpair->id + 0) * - pctrlr->doorbell_stride_u32; - pqpair->shadow_doorbell.cq_eventidx = ctrlr->eventidx + (2 * qpair->id + 1) * - pctrlr->doorbell_stride_u32; - pqpair->flags.has_shadow_doorbell = 1; - } else { - pqpair->flags.has_shadow_doorbell = 0; - } - nvme_pcie_qpair_reset(qpair); - free(status); - - return 0; -} - static struct spdk_nvme_qpair * nvme_pcie_ctrlr_create_io_qpair(struct spdk_nvme_ctrlr *ctrlr, uint16_t qid, const struct spdk_nvme_io_qpair_opts *opts) @@ -1749,21 +1116,6 @@ nvme_pcie_ctrlr_create_io_qpair(struct spdk_nvme_ctrlr *ctrlr, uint16_t qid, return qpair; } -static int -nvme_pcie_ctrlr_connect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) -{ - if (nvme_qpair_is_admin_queue(qpair)) { - return 0; - } else { - return _nvme_pcie_ctrlr_create_io_qpair(ctrlr, qpair, qpair->id); - } -} - -static void -nvme_pcie_ctrlr_disconnect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) -{ -} - static int32_t nvme_pcie_qpair_process_completions(struct spdk_nvme_qpair *qpair, uint32_t max_completions); @@ -2482,81 +1834,6 @@ nvme_pcie_qpair_process_completions(struct spdk_nvme_qpair *qpair, uint32_t max_ return num_completions; } -static struct spdk_nvme_transport_poll_group * -nvme_pcie_poll_group_create(void) -{ - struct nvme_pcie_poll_group *group = calloc(1, sizeof(*group)); - - if (group == NULL) { - SPDK_ERRLOG("Unable to allocate poll group.\n"); - return NULL; - } - - return &group->group; -} - -static int -nvme_pcie_poll_group_connect_qpair(struct spdk_nvme_qpair *qpair) -{ - return 0; -} - -static int -nvme_pcie_poll_group_disconnect_qpair(struct spdk_nvme_qpair *qpair) -{ - return 0; -} - -static int -nvme_pcie_poll_group_add(struct spdk_nvme_transport_poll_group *tgroup, - struct spdk_nvme_qpair *qpair) -{ - return 0; -} - -static int -nvme_pcie_poll_group_remove(struct spdk_nvme_transport_poll_group *tgroup, - struct spdk_nvme_qpair *qpair) -{ - return 0; -} - -static int64_t -nvme_pcie_poll_group_process_completions(struct spdk_nvme_transport_poll_group *tgroup, - uint32_t completions_per_qpair, spdk_nvme_disconnected_qpair_cb disconnected_qpair_cb) -{ - struct spdk_nvme_qpair *qpair, *tmp_qpair; - int32_t local_completions = 0; - int64_t total_completions = 0; - - STAILQ_FOREACH_SAFE(qpair, &tgroup->disconnected_qpairs, poll_group_stailq, tmp_qpair) { - disconnected_qpair_cb(qpair, tgroup->group->ctx); - } - - STAILQ_FOREACH_SAFE(qpair, &tgroup->connected_qpairs, poll_group_stailq, tmp_qpair) { - local_completions = spdk_nvme_qpair_process_completions(qpair, completions_per_qpair); - if (local_completions < 0) { - disconnected_qpair_cb(qpair, tgroup->group->ctx); - local_completions = 0; - } - total_completions += local_completions; - } - - return total_completions; -} - -static int -nvme_pcie_poll_group_destroy(struct spdk_nvme_transport_poll_group *tgroup) -{ - if (!STAILQ_EMPTY(&tgroup->connected_qpairs) || !STAILQ_EMPTY(&tgroup->disconnected_qpairs)) { - return -EBUSY; - } - - free(tgroup); - - return 0; -} - static struct spdk_pci_id nvme_pci_driver_id[] = { { .class_id = SPDK_PCI_CLASS_NVME, diff --git a/lib/nvme/nvme_pcie_common.c b/lib/nvme/nvme_pcie_common.c new file mode 100644 index 000000000..2ebc9a688 --- /dev/null +++ b/lib/nvme/nvme_pcie_common.c @@ -0,0 +1,593 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * NVMe over PCIe common library + */ + +#include "spdk/stdinc.h" +#include "spdk/likely.h" +#include "spdk/string.h" +#include "nvme_internal.h" +#include "nvme_pcie_internal.h" + +int +nvme_pcie_qpair_reset(struct spdk_nvme_qpair *qpair) +{ + struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); + uint32_t i; + + /* all head/tail vals are set to 0 */ + pqpair->last_sq_tail = pqpair->sq_tail = pqpair->sq_head = pqpair->cq_head = 0; + + /* + * First time through the completion queue, HW will set phase + * bit on completions to 1. So set this to 1 here, indicating + * we're looking for a 1 to know which entries have completed. + * we'll toggle the bit each time when the completion queue + * rolls over. + */ + pqpair->flags.phase = 1; + for (i = 0; i < pqpair->num_entries; i++) { + pqpair->cpl[i].status.p = 0; + } + + return 0; +} + +static void +nvme_qpair_construct_tracker(struct nvme_tracker *tr, uint16_t cid, uint64_t phys_addr) +{ + tr->prp_sgl_bus_addr = phys_addr + offsetof(struct nvme_tracker, u.prp); + tr->cid = cid; + tr->req = NULL; +} + +static void * +nvme_pcie_ctrlr_alloc_cmb(struct spdk_nvme_ctrlr *ctrlr, uint64_t size, uint64_t alignment, + uint64_t *phys_addr) +{ + struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr); + uintptr_t addr; + + if (pctrlr->cmb.mem_register_addr != NULL) { + /* BAR is mapped for data */ + return NULL; + } + + addr = (uintptr_t)pctrlr->cmb.bar_va + pctrlr->cmb.current_offset; + addr = (addr + (alignment - 1)) & ~(alignment - 1); + + /* CMB may only consume part of the BAR, calculate accordingly */ + if (addr + size > ((uintptr_t)pctrlr->cmb.bar_va + pctrlr->cmb.size)) { + SPDK_ERRLOG("Tried to allocate past valid CMB range!\n"); + return NULL; + } + *phys_addr = pctrlr->cmb.bar_pa + addr - (uintptr_t)pctrlr->cmb.bar_va; + + pctrlr->cmb.current_offset = (addr + size) - (uintptr_t)pctrlr->cmb.bar_va; + + return (void *)addr; +} + +int +nvme_pcie_qpair_construct(struct spdk_nvme_qpair *qpair, + const struct spdk_nvme_io_qpair_opts *opts) +{ + struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; + struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr); + struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); + struct nvme_tracker *tr; + uint16_t i; + volatile uint32_t *doorbell_base; + uint16_t num_trackers; + size_t page_align = sysconf(_SC_PAGESIZE); + size_t queue_align, queue_len; + uint32_t flags = SPDK_MALLOC_DMA; + uint64_t sq_paddr = 0; + uint64_t cq_paddr = 0; + + if (opts) { + pqpair->sq_vaddr = opts->sq.vaddr; + pqpair->cq_vaddr = opts->cq.vaddr; + sq_paddr = opts->sq.paddr; + cq_paddr = opts->cq.paddr; + } + + pqpair->retry_count = ctrlr->opts.transport_retry_count; + + /* + * Limit the maximum number of completions to return per call to prevent wraparound, + * and calculate how many trackers can be submitted at once without overflowing the + * completion queue. + */ + pqpair->max_completions_cap = pqpair->num_entries / 4; + pqpair->max_completions_cap = spdk_max(pqpair->max_completions_cap, NVME_MIN_COMPLETIONS); + pqpair->max_completions_cap = spdk_min(pqpair->max_completions_cap, NVME_MAX_COMPLETIONS); + num_trackers = pqpair->num_entries - pqpair->max_completions_cap; + + SPDK_INFOLOG(nvme, "max_completions_cap = %" PRIu16 " num_trackers = %" PRIu16 "\n", + pqpair->max_completions_cap, num_trackers); + + assert(num_trackers != 0); + + pqpair->sq_in_cmb = false; + + if (nvme_qpair_is_admin_queue(&pqpair->qpair)) { + flags |= SPDK_MALLOC_SHARE; + } + + /* cmd and cpl rings must be aligned on page size boundaries. */ + if (ctrlr->opts.use_cmb_sqs) { + pqpair->cmd = nvme_pcie_ctrlr_alloc_cmb(ctrlr, pqpair->num_entries * sizeof(struct spdk_nvme_cmd), + page_align, &pqpair->cmd_bus_addr); + if (pqpair->cmd != NULL) { + pqpair->sq_in_cmb = true; + } + } + + if (pqpair->sq_in_cmb == false) { + if (pqpair->sq_vaddr) { + pqpair->cmd = pqpair->sq_vaddr; + } else { + /* To ensure physical address contiguity we make each ring occupy + * a single hugepage only. See MAX_IO_QUEUE_ENTRIES. + */ + queue_len = pqpair->num_entries * sizeof(struct spdk_nvme_cmd); + queue_align = spdk_max(spdk_align32pow2(queue_len), page_align); + pqpair->cmd = spdk_zmalloc(queue_len, queue_align, NULL, SPDK_ENV_SOCKET_ID_ANY, flags); + if (pqpair->cmd == NULL) { + SPDK_ERRLOG("alloc qpair_cmd failed\n"); + return -ENOMEM; + } + } + if (sq_paddr) { + assert(pqpair->sq_vaddr != NULL); + pqpair->cmd_bus_addr = sq_paddr; + } else { + pqpair->cmd_bus_addr = spdk_vtophys(pqpair->cmd, NULL); + if (pqpair->cmd_bus_addr == SPDK_VTOPHYS_ERROR) { + SPDK_ERRLOG("spdk_vtophys(pqpair->cmd) failed\n"); + return -EFAULT; + } + } + } + + if (pqpair->cq_vaddr) { + pqpair->cpl = pqpair->cq_vaddr; + } else { + queue_len = pqpair->num_entries * sizeof(struct spdk_nvme_cpl); + queue_align = spdk_max(spdk_align32pow2(queue_len), page_align); + pqpair->cpl = spdk_zmalloc(queue_len, queue_align, NULL, SPDK_ENV_SOCKET_ID_ANY, flags); + if (pqpair->cpl == NULL) { + SPDK_ERRLOG("alloc qpair_cpl failed\n"); + return -ENOMEM; + } + } + if (cq_paddr) { + assert(pqpair->cq_vaddr != NULL); + pqpair->cpl_bus_addr = cq_paddr; + } else { + pqpair->cpl_bus_addr = spdk_vtophys(pqpair->cpl, NULL); + if (pqpair->cpl_bus_addr == SPDK_VTOPHYS_ERROR) { + SPDK_ERRLOG("spdk_vtophys(pqpair->cpl) failed\n"); + return -EFAULT; + } + } + + doorbell_base = &pctrlr->regs->doorbell[0].sq_tdbl; + pqpair->sq_tdbl = doorbell_base + (2 * qpair->id + 0) * pctrlr->doorbell_stride_u32; + pqpair->cq_hdbl = doorbell_base + (2 * qpair->id + 1) * pctrlr->doorbell_stride_u32; + + /* + * Reserve space for all of the trackers in a single allocation. + * struct nvme_tracker must be padded so that its size is already a power of 2. + * This ensures the PRP list embedded in the nvme_tracker object will not span a + * 4KB boundary, while allowing access to trackers in tr[] via normal array indexing. + */ + pqpair->tr = spdk_zmalloc(num_trackers * sizeof(*tr), sizeof(*tr), NULL, + SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE); + if (pqpair->tr == NULL) { + SPDK_ERRLOG("nvme_tr failed\n"); + return -ENOMEM; + } + + TAILQ_INIT(&pqpair->free_tr); + TAILQ_INIT(&pqpair->outstanding_tr); + + for (i = 0; i < num_trackers; i++) { + tr = &pqpair->tr[i]; + nvme_qpair_construct_tracker(tr, i, spdk_vtophys(tr, NULL)); + TAILQ_INSERT_HEAD(&pqpair->free_tr, tr, tq_list); + } + + nvme_pcie_qpair_reset(qpair); + + return 0; +} + +int +nvme_pcie_ctrlr_construct_admin_qpair(struct spdk_nvme_ctrlr *ctrlr, uint16_t num_entries) +{ + struct nvme_pcie_qpair *pqpair; + int rc; + + pqpair = spdk_zmalloc(sizeof(*pqpair), 64, NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE); + if (pqpair == NULL) { + return -ENOMEM; + } + + pqpair->num_entries = num_entries; + pqpair->flags.delay_cmd_submit = 0; + + ctrlr->adminq = &pqpair->qpair; + + rc = nvme_qpair_init(ctrlr->adminq, + 0, /* qpair ID */ + ctrlr, + SPDK_NVME_QPRIO_URGENT, + num_entries); + if (rc != 0) { + return rc; + } + + return nvme_pcie_qpair_construct(ctrlr->adminq, NULL); +} + +/** + * Note: the ctrlr_lock must be held when calling this function. + */ +void +nvme_pcie_qpair_insert_pending_admin_request(struct spdk_nvme_qpair *qpair, + struct nvme_request *req, struct spdk_nvme_cpl *cpl) +{ + struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; + struct nvme_request *active_req = req; + struct spdk_nvme_ctrlr_process *active_proc; + + /* + * The admin request is from another process. Move to the per + * process list for that process to handle it later. + */ + assert(nvme_qpair_is_admin_queue(qpair)); + assert(active_req->pid != getpid()); + + active_proc = nvme_ctrlr_get_process(ctrlr, active_req->pid); + if (active_proc) { + /* Save the original completion information */ + memcpy(&active_req->cpl, cpl, sizeof(*cpl)); + STAILQ_INSERT_TAIL(&active_proc->active_reqs, active_req, stailq); + } else { + SPDK_ERRLOG("The owning process (pid %d) is not found. Dropping the request.\n", + active_req->pid); + + nvme_free_request(active_req); + } +} + +/** + * Note: the ctrlr_lock must be held when calling this function. + */ +void +nvme_pcie_qpair_complete_pending_admin_request(struct spdk_nvme_qpair *qpair) +{ + struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; + struct nvme_request *req, *tmp_req; + pid_t pid = getpid(); + struct spdk_nvme_ctrlr_process *proc; + + /* + * Check whether there is any pending admin request from + * other active processes. + */ + assert(nvme_qpair_is_admin_queue(qpair)); + + proc = nvme_ctrlr_get_current_process(ctrlr); + if (!proc) { + SPDK_ERRLOG("the active process (pid %d) is not found for this controller.\n", pid); + assert(proc); + return; + } + + STAILQ_FOREACH_SAFE(req, &proc->active_reqs, stailq, tmp_req) { + STAILQ_REMOVE(&proc->active_reqs, req, nvme_request, stailq); + + assert(req->pid == pid); + + nvme_complete_request(req->cb_fn, req->cb_arg, qpair, req, &req->cpl); + nvme_free_request(req); + } +} + +int +nvme_pcie_ctrlr_cmd_create_io_cq(struct spdk_nvme_ctrlr *ctrlr, + struct spdk_nvme_qpair *io_que, spdk_nvme_cmd_cb cb_fn, + void *cb_arg) +{ + struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(io_que); + struct nvme_request *req; + struct spdk_nvme_cmd *cmd; + + req = nvme_allocate_request_null(ctrlr->adminq, cb_fn, cb_arg); + if (req == NULL) { + return -ENOMEM; + } + + cmd = &req->cmd; + cmd->opc = SPDK_NVME_OPC_CREATE_IO_CQ; + + cmd->cdw10_bits.create_io_q.qid = io_que->id; + cmd->cdw10_bits.create_io_q.qsize = pqpair->num_entries - 1; + + cmd->cdw11_bits.create_io_cq.pc = 1; + cmd->dptr.prp.prp1 = pqpair->cpl_bus_addr; + + return nvme_ctrlr_submit_admin_request(ctrlr, req); +} + +int +nvme_pcie_ctrlr_cmd_create_io_sq(struct spdk_nvme_ctrlr *ctrlr, + struct spdk_nvme_qpair *io_que, spdk_nvme_cmd_cb cb_fn, void *cb_arg) +{ + struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(io_que); + struct nvme_request *req; + struct spdk_nvme_cmd *cmd; + + req = nvme_allocate_request_null(ctrlr->adminq, cb_fn, cb_arg); + if (req == NULL) { + return -ENOMEM; + } + + cmd = &req->cmd; + cmd->opc = SPDK_NVME_OPC_CREATE_IO_SQ; + + cmd->cdw10_bits.create_io_q.qid = io_que->id; + cmd->cdw10_bits.create_io_q.qsize = pqpair->num_entries - 1; + cmd->cdw11_bits.create_io_sq.pc = 1; + cmd->cdw11_bits.create_io_sq.qprio = io_que->qprio; + cmd->cdw11_bits.create_io_sq.cqid = io_que->id; + cmd->dptr.prp.prp1 = pqpair->cmd_bus_addr; + + return nvme_ctrlr_submit_admin_request(ctrlr, req); +} + +int +nvme_pcie_ctrlr_cmd_delete_io_cq(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair, + spdk_nvme_cmd_cb cb_fn, void *cb_arg) +{ + struct nvme_request *req; + struct spdk_nvme_cmd *cmd; + + req = nvme_allocate_request_null(ctrlr->adminq, cb_fn, cb_arg); + if (req == NULL) { + return -ENOMEM; + } + + cmd = &req->cmd; + cmd->opc = SPDK_NVME_OPC_DELETE_IO_CQ; + cmd->cdw10_bits.delete_io_q.qid = qpair->id; + + return nvme_ctrlr_submit_admin_request(ctrlr, req); +} + +int +nvme_pcie_ctrlr_cmd_delete_io_sq(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair, + spdk_nvme_cmd_cb cb_fn, void *cb_arg) +{ + struct nvme_request *req; + struct spdk_nvme_cmd *cmd; + + req = nvme_allocate_request_null(ctrlr->adminq, cb_fn, cb_arg); + if (req == NULL) { + return -ENOMEM; + } + + cmd = &req->cmd; + cmd->opc = SPDK_NVME_OPC_DELETE_IO_SQ; + cmd->cdw10_bits.delete_io_q.qid = qpair->id; + + return nvme_ctrlr_submit_admin_request(ctrlr, req); +} + +static int +_nvme_pcie_ctrlr_create_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair, + uint16_t qid) +{ + struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr); + struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); + struct nvme_completion_poll_status *status; + int rc; + + status = calloc(1, sizeof(*status)); + if (!status) { + SPDK_ERRLOG("Failed to allocate status tracker\n"); + return -ENOMEM; + } + + rc = nvme_pcie_ctrlr_cmd_create_io_cq(ctrlr, qpair, nvme_completion_poll_cb, status); + if (rc != 0) { + free(status); + return rc; + } + + if (nvme_wait_for_completion(ctrlr->adminq, status)) { + SPDK_ERRLOG("nvme_create_io_cq failed!\n"); + if (!status->timed_out) { + free(status); + } + return -1; + } + + memset(status, 0, sizeof(*status)); + rc = nvme_pcie_ctrlr_cmd_create_io_sq(qpair->ctrlr, qpair, nvme_completion_poll_cb, status); + if (rc != 0) { + free(status); + return rc; + } + + if (nvme_wait_for_completion(ctrlr->adminq, status)) { + SPDK_ERRLOG("nvme_create_io_sq failed!\n"); + if (status->timed_out) { + /* Request is still queued, the memory will be freed in a completion callback. + allocate a new request */ + status = calloc(1, sizeof(*status)); + if (!status) { + SPDK_ERRLOG("Failed to allocate status tracker\n"); + return -ENOMEM; + } + } + + memset(status, 0, sizeof(*status)); + /* Attempt to delete the completion queue */ + rc = nvme_pcie_ctrlr_cmd_delete_io_cq(qpair->ctrlr, qpair, nvme_completion_poll_cb, status); + if (rc != 0) { + /* The originall or newly allocated status structure can be freed since + * the corresponding request has been completed of failed to submit */ + free(status); + return -1; + } + nvme_wait_for_completion(ctrlr->adminq, status); + if (!status->timed_out) { + /* status can be freed regardless of nvme_wait_for_completion return value */ + free(status); + } + return -1; + } + + if (ctrlr->shadow_doorbell) { + pqpair->shadow_doorbell.sq_tdbl = ctrlr->shadow_doorbell + (2 * qpair->id + 0) * + pctrlr->doorbell_stride_u32; + pqpair->shadow_doorbell.cq_hdbl = ctrlr->shadow_doorbell + (2 * qpair->id + 1) * + pctrlr->doorbell_stride_u32; + pqpair->shadow_doorbell.sq_eventidx = ctrlr->eventidx + (2 * qpair->id + 0) * + pctrlr->doorbell_stride_u32; + pqpair->shadow_doorbell.cq_eventidx = ctrlr->eventidx + (2 * qpair->id + 1) * + pctrlr->doorbell_stride_u32; + pqpair->flags.has_shadow_doorbell = 1; + } else { + pqpair->flags.has_shadow_doorbell = 0; + } + nvme_pcie_qpair_reset(qpair); + free(status); + + return 0; +} + +int +nvme_pcie_ctrlr_connect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) +{ + if (nvme_qpair_is_admin_queue(qpair)) { + return 0; + } else { + return _nvme_pcie_ctrlr_create_io_qpair(ctrlr, qpair, qpair->id); + } +} + +void +nvme_pcie_ctrlr_disconnect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) +{ +} + +struct spdk_nvme_transport_poll_group * +nvme_pcie_poll_group_create(void) +{ + struct nvme_pcie_poll_group *group = calloc(1, sizeof(*group)); + + if (group == NULL) { + SPDK_ERRLOG("Unable to allocate poll group.\n"); + return NULL; + } + + return &group->group; +} + +int +nvme_pcie_poll_group_connect_qpair(struct spdk_nvme_qpair *qpair) +{ + return 0; +} + +int +nvme_pcie_poll_group_disconnect_qpair(struct spdk_nvme_qpair *qpair) +{ + return 0; +} + +int +nvme_pcie_poll_group_add(struct spdk_nvme_transport_poll_group *tgroup, + struct spdk_nvme_qpair *qpair) +{ + return 0; +} + +int +nvme_pcie_poll_group_remove(struct spdk_nvme_transport_poll_group *tgroup, + struct spdk_nvme_qpair *qpair) +{ + return 0; +} + +int64_t +nvme_pcie_poll_group_process_completions(struct spdk_nvme_transport_poll_group *tgroup, + uint32_t completions_per_qpair, spdk_nvme_disconnected_qpair_cb disconnected_qpair_cb) +{ + struct spdk_nvme_qpair *qpair, *tmp_qpair; + int32_t local_completions = 0; + int64_t total_completions = 0; + + STAILQ_FOREACH_SAFE(qpair, &tgroup->disconnected_qpairs, poll_group_stailq, tmp_qpair) { + disconnected_qpair_cb(qpair, tgroup->group->ctx); + } + + STAILQ_FOREACH_SAFE(qpair, &tgroup->connected_qpairs, poll_group_stailq, tmp_qpair) { + local_completions = spdk_nvme_qpair_process_completions(qpair, completions_per_qpair); + if (local_completions < 0) { + disconnected_qpair_cb(qpair, tgroup->group->ctx); + local_completions = 0; + } + total_completions += local_completions; + } + + return total_completions; +} + +int +nvme_pcie_poll_group_destroy(struct spdk_nvme_transport_poll_group *tgroup) +{ + if (!STAILQ_EMPTY(&tgroup->connected_qpairs) || !STAILQ_EMPTY(&tgroup->disconnected_qpairs)) { + return -EBUSY; + } + + free(tgroup); + + return 0; +} diff --git a/lib/nvme/nvme_pcie_internal.h b/lib/nvme/nvme_pcie_internal.h new file mode 100644 index 000000000..217b2d6e2 --- /dev/null +++ b/lib/nvme/nvme_pcie_internal.h @@ -0,0 +1,235 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __NVME_PCIE_INTERNAL_H__ +#define __NVME_PCIE_INTERNAL_H__ + +/* + * Number of completion queue entries to process before ringing the + * completion queue doorbell. + */ +#define NVME_MIN_COMPLETIONS (1) +#define NVME_MAX_COMPLETIONS (128) + +/* + * NVME_MAX_SGL_DESCRIPTORS defines the maximum number of descriptors in one SGL + * segment. + */ +#define NVME_MAX_SGL_DESCRIPTORS (250) + +#define NVME_MAX_PRP_LIST_ENTRIES (503) + +/* PCIe transport extensions for spdk_nvme_ctrlr */ +struct nvme_pcie_ctrlr { + struct spdk_nvme_ctrlr ctrlr; + + /** NVMe MMIO register space */ + volatile struct spdk_nvme_registers *regs; + + /** NVMe MMIO register size */ + uint64_t regs_size; + + struct { + /* BAR mapping address which contains controller memory buffer */ + void *bar_va; + + /* BAR physical address which contains controller memory buffer */ + uint64_t bar_pa; + + /* Controller memory buffer size in Bytes */ + uint64_t size; + + /* Current offset of controller memory buffer, relative to start of BAR virt addr */ + uint64_t current_offset; + + void *mem_register_addr; + size_t mem_register_size; + } cmb; + + /** stride in uint32_t units between doorbell registers (1 = 4 bytes, 2 = 8 bytes, ...) */ + uint32_t doorbell_stride_u32; + + /* Opaque handle to associated PCI device. */ + struct spdk_pci_device *devhandle; + + /* Flag to indicate the MMIO register has been remapped */ + bool is_remapped; +}; + +struct nvme_tracker { + TAILQ_ENTRY(nvme_tracker) tq_list; + + struct nvme_request *req; + uint16_t cid; + + uint16_t rsvd0; + uint32_t rsvd1; + + spdk_nvme_cmd_cb cb_fn; + void *cb_arg; + + uint64_t prp_sgl_bus_addr; + + /* Don't move, metadata SGL is always contiguous with Data Block SGL */ + struct spdk_nvme_sgl_descriptor meta_sgl; + union { + uint64_t prp[NVME_MAX_PRP_LIST_ENTRIES]; + struct spdk_nvme_sgl_descriptor sgl[NVME_MAX_SGL_DESCRIPTORS]; + } u; +}; +/* + * struct nvme_tracker must be exactly 4K so that the prp[] array does not cross a page boundary + * and so that there is no padding required to meet alignment requirements. + */ +SPDK_STATIC_ASSERT(sizeof(struct nvme_tracker) == 4096, "nvme_tracker is not 4K"); +SPDK_STATIC_ASSERT((offsetof(struct nvme_tracker, u.sgl) & 7) == 0, "SGL must be Qword aligned"); +SPDK_STATIC_ASSERT((offsetof(struct nvme_tracker, meta_sgl) & 7) == 0, "SGL must be Qword aligned"); + +struct nvme_pcie_poll_group { + struct spdk_nvme_transport_poll_group group; +}; + +/* PCIe transport extensions for spdk_nvme_qpair */ +struct nvme_pcie_qpair { + /* Submission queue tail doorbell */ + volatile uint32_t *sq_tdbl; + + /* Completion queue head doorbell */ + volatile uint32_t *cq_hdbl; + + /* Submission queue */ + struct spdk_nvme_cmd *cmd; + + /* Completion queue */ + struct spdk_nvme_cpl *cpl; + + TAILQ_HEAD(, nvme_tracker) free_tr; + TAILQ_HEAD(nvme_outstanding_tr_head, nvme_tracker) outstanding_tr; + + /* Array of trackers indexed by command ID. */ + struct nvme_tracker *tr; + + uint16_t num_entries; + + uint8_t retry_count; + + uint16_t max_completions_cap; + + uint16_t last_sq_tail; + uint16_t sq_tail; + uint16_t cq_head; + uint16_t sq_head; + + struct { + uint8_t phase : 1; + uint8_t delay_cmd_submit : 1; + uint8_t has_shadow_doorbell : 1; + } flags; + + /* + * Base qpair structure. + * This is located after the hot data in this structure so that the important parts of + * nvme_pcie_qpair are in the same cache line. + */ + struct spdk_nvme_qpair qpair; + + struct { + /* Submission queue shadow tail doorbell */ + volatile uint32_t *sq_tdbl; + + /* Completion queue shadow head doorbell */ + volatile uint32_t *cq_hdbl; + + /* Submission queue event index */ + volatile uint32_t *sq_eventidx; + + /* Completion queue event index */ + volatile uint32_t *cq_eventidx; + } shadow_doorbell; + + /* + * Fields below this point should not be touched on the normal I/O path. + */ + + bool sq_in_cmb; + + uint64_t cmd_bus_addr; + uint64_t cpl_bus_addr; + + struct spdk_nvme_cmd *sq_vaddr; + struct spdk_nvme_cpl *cq_vaddr; +}; + +static inline struct nvme_pcie_qpair * +nvme_pcie_qpair(struct spdk_nvme_qpair *qpair) +{ + return SPDK_CONTAINEROF(qpair, struct nvme_pcie_qpair, qpair); +} + +static inline struct nvme_pcie_ctrlr * +nvme_pcie_ctrlr(struct spdk_nvme_ctrlr *ctrlr) +{ + return SPDK_CONTAINEROF(ctrlr, struct nvme_pcie_ctrlr, ctrlr); +} + +int nvme_pcie_qpair_reset(struct spdk_nvme_qpair *qpair); +int nvme_pcie_qpair_construct(struct spdk_nvme_qpair *qpair, + const struct spdk_nvme_io_qpair_opts *opts); +int nvme_pcie_ctrlr_construct_admin_qpair(struct spdk_nvme_ctrlr *ctrlr, uint16_t num_entries); +void nvme_pcie_qpair_insert_pending_admin_request(struct spdk_nvme_qpair *qpair, + struct nvme_request *req, struct spdk_nvme_cpl *cpl); +void nvme_pcie_qpair_complete_pending_admin_request(struct spdk_nvme_qpair *qpair); +int nvme_pcie_ctrlr_cmd_create_io_cq(struct spdk_nvme_ctrlr *ctrlr, + struct spdk_nvme_qpair *io_que, spdk_nvme_cmd_cb cb_fn, + void *cb_arg); +int nvme_pcie_ctrlr_cmd_create_io_sq(struct spdk_nvme_ctrlr *ctrlr, + struct spdk_nvme_qpair *io_que, spdk_nvme_cmd_cb cb_fn, void *cb_arg); +int nvme_pcie_ctrlr_cmd_delete_io_cq(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair, + spdk_nvme_cmd_cb cb_fn, void *cb_arg); +int nvme_pcie_ctrlr_cmd_delete_io_sq(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair, + spdk_nvme_cmd_cb cb_fn, void *cb_arg); +int nvme_pcie_ctrlr_connect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair); +void nvme_pcie_ctrlr_disconnect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair); + +struct spdk_nvme_transport_poll_group *nvme_pcie_poll_group_create(void); +int nvme_pcie_poll_group_connect_qpair(struct spdk_nvme_qpair *qpair); +int nvme_pcie_poll_group_disconnect_qpair(struct spdk_nvme_qpair *qpair); +int nvme_pcie_poll_group_add(struct spdk_nvme_transport_poll_group *tgroup, + struct spdk_nvme_qpair *qpair); +int nvme_pcie_poll_group_remove(struct spdk_nvme_transport_poll_group *tgroup, + struct spdk_nvme_qpair *qpair); +int64_t nvme_pcie_poll_group_process_completions(struct spdk_nvme_transport_poll_group *tgroup, + uint32_t completions_per_qpair, + spdk_nvme_disconnected_qpair_cb disconnected_qpair_cb); +int nvme_pcie_poll_group_destroy(struct spdk_nvme_transport_poll_group *tgroup); + +#endif diff --git a/test/unit/lib/nvme/nvme_pcie.c/nvme_pcie_ut.c b/test/unit/lib/nvme/nvme_pcie.c/nvme_pcie_ut.c index 49bfa74a8..5956bceb5 100644 --- a/test/unit/lib/nvme/nvme_pcie.c/nvme_pcie_ut.c +++ b/test/unit/lib/nvme/nvme_pcie.c/nvme_pcie_ut.c @@ -38,6 +38,7 @@ #define UNIT_TEST_NO_VTOPHYS #include "nvme/nvme_pcie.c" +#include "nvme/nvme_pcie_common.c" #include "common/lib/nvme/common_stubs.h" pid_t g_spdk_nvme_pid;