/*- * BSD LICENSE * * Copyright (c) Intel Corporation. All rights reserved. * Copyright (c) 2021 Mellanox Technologies LTD. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * Neither the name of Intel Corporation nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef __NVME_PCIE_INTERNAL_H__ #define __NVME_PCIE_INTERNAL_H__ /* * Number of completion queue entries to process before ringing the * completion queue doorbell. */ #define NVME_MIN_COMPLETIONS (1) #define NVME_MAX_COMPLETIONS (128) /* * NVME_MAX_SGL_DESCRIPTORS defines the maximum number of descriptors in one SGL * segment. */ #define NVME_MAX_SGL_DESCRIPTORS (250) #define NVME_MAX_PRP_LIST_ENTRIES (503) /* PCIe transport extensions for spdk_nvme_ctrlr */ struct nvme_pcie_ctrlr { struct spdk_nvme_ctrlr ctrlr; /** NVMe MMIO register space */ volatile struct spdk_nvme_registers *regs; /** NVMe MMIO register size */ uint64_t regs_size; struct { /* BAR mapping address which contains controller memory buffer */ void *bar_va; /* BAR physical address which contains controller memory buffer */ uint64_t bar_pa; /* Controller memory buffer size in Bytes */ uint64_t size; /* Current offset of controller memory buffer, relative to start of BAR virt addr */ uint64_t current_offset; void *mem_register_addr; size_t mem_register_size; } cmb; struct { /* BAR mapping address which contains persistent memory region */ void *bar_va; /* BAR physical address which contains persistent memory region */ uint64_t bar_pa; /* Persistent memory region size in Bytes */ uint64_t size; void *mem_register_addr; size_t mem_register_size; } pmr; /** stride in uint32_t units between doorbell registers (1 = 4 bytes, 2 = 8 bytes, ...) */ uint32_t doorbell_stride_u32; /* Opaque handle to associated PCI device. */ struct spdk_pci_device *devhandle; /* Flag to indicate the MMIO register has been remapped */ bool is_remapped; volatile uint32_t *doorbell_base; }; extern __thread struct nvme_pcie_ctrlr *g_thread_mmio_ctrlr; struct nvme_tracker { TAILQ_ENTRY(nvme_tracker) tq_list; struct nvme_request *req; uint16_t cid; uint16_t bad_vtophys : 1; uint16_t rsvd0 : 15; uint32_t rsvd1; spdk_nvme_cmd_cb cb_fn; void *cb_arg; uint64_t prp_sgl_bus_addr; /* Don't move, metadata SGL is always contiguous with Data Block SGL */ struct spdk_nvme_sgl_descriptor meta_sgl; union { uint64_t prp[NVME_MAX_PRP_LIST_ENTRIES]; struct spdk_nvme_sgl_descriptor sgl[NVME_MAX_SGL_DESCRIPTORS]; } u; }; /* * struct nvme_tracker must be exactly 4K so that the prp[] array does not cross a page boundary * and so that there is no padding required to meet alignment requirements. */ SPDK_STATIC_ASSERT(sizeof(struct nvme_tracker) == 4096, "nvme_tracker is not 4K"); SPDK_STATIC_ASSERT((offsetof(struct nvme_tracker, u.sgl) & 7) == 0, "SGL must be Qword aligned"); SPDK_STATIC_ASSERT((offsetof(struct nvme_tracker, meta_sgl) & 7) == 0, "SGL must be Qword aligned"); struct nvme_pcie_poll_group { struct spdk_nvme_transport_poll_group group; struct spdk_nvme_pcie_stat stats; }; enum nvme_pcie_qpair_state { NVME_PCIE_QPAIR_WAIT_FOR_CQ = 1, NVME_PCIE_QPAIR_WAIT_FOR_SQ, NVME_PCIE_QPAIR_READY, NVME_PCIE_QPAIR_FAILED, }; /* PCIe transport extensions for spdk_nvme_qpair */ struct nvme_pcie_qpair { /* Submission queue tail doorbell */ volatile uint32_t *sq_tdbl; /* Completion queue head doorbell */ volatile uint32_t *cq_hdbl; /* Submission queue */ struct spdk_nvme_cmd *cmd; /* Completion queue */ struct spdk_nvme_cpl *cpl; TAILQ_HEAD(, nvme_tracker) free_tr; TAILQ_HEAD(nvme_outstanding_tr_head, nvme_tracker) outstanding_tr; /* Array of trackers indexed by command ID. */ struct nvme_tracker *tr; struct spdk_nvme_pcie_stat *stat; uint16_t num_entries; uint8_t pcie_state; uint8_t retry_count; uint16_t max_completions_cap; uint16_t last_sq_tail; uint16_t sq_tail; uint16_t cq_head; uint16_t sq_head; struct { uint8_t phase : 1; uint8_t delay_cmd_submit : 1; uint8_t has_shadow_doorbell : 1; uint8_t has_pending_vtophys_failures : 1; uint8_t defer_destruction : 1; } flags; /* * Base qpair structure. * This is located after the hot data in this structure so that the important parts of * nvme_pcie_qpair are in the same cache line. */ struct spdk_nvme_qpair qpair; struct { /* Submission queue shadow tail doorbell */ volatile uint32_t *sq_tdbl; /* Completion queue shadow head doorbell */ volatile uint32_t *cq_hdbl; /* Submission queue event index */ volatile uint32_t *sq_eventidx; /* Completion queue event index */ volatile uint32_t *cq_eventidx; } shadow_doorbell; /* * Fields below this point should not be touched on the normal I/O path. */ bool sq_in_cmb; bool shared_stats; uint64_t cmd_bus_addr; uint64_t cpl_bus_addr; struct spdk_nvme_cmd *sq_vaddr; struct spdk_nvme_cpl *cq_vaddr; }; static inline struct nvme_pcie_qpair * nvme_pcie_qpair(struct spdk_nvme_qpair *qpair) { return SPDK_CONTAINEROF(qpair, struct nvme_pcie_qpair, qpair); } static inline struct nvme_pcie_ctrlr * nvme_pcie_ctrlr(struct spdk_nvme_ctrlr *ctrlr) { return SPDK_CONTAINEROF(ctrlr, struct nvme_pcie_ctrlr, ctrlr); } static inline int nvme_pcie_qpair_need_event(uint16_t event_idx, uint16_t new_idx, uint16_t old) { return (uint16_t)(new_idx - event_idx) <= (uint16_t)(new_idx - old); } static inline bool nvme_pcie_qpair_update_mmio_required(struct spdk_nvme_qpair *qpair, uint16_t value, volatile uint32_t *shadow_db, volatile uint32_t *eventidx) { uint16_t old; if (!shadow_db) { return true; } spdk_wmb(); old = *shadow_db; *shadow_db = value; /* * Ensure that the doorbell is updated before reading the EventIdx from * memory */ spdk_mb(); if (!nvme_pcie_qpair_need_event(*eventidx, value, old)) { return false; } return true; } static inline void nvme_pcie_qpair_ring_sq_doorbell(struct spdk_nvme_qpair *qpair) { struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(qpair->ctrlr); bool need_mmio = true; if (qpair->first_fused_submitted) { /* This is first cmd of two fused commands - don't ring doorbell */ qpair->first_fused_submitted = 0; return; } if (spdk_unlikely(pqpair->flags.has_shadow_doorbell)) { need_mmio = nvme_pcie_qpair_update_mmio_required(qpair, pqpair->sq_tail, pqpair->shadow_doorbell.sq_tdbl, pqpair->shadow_doorbell.sq_eventidx); } if (spdk_likely(need_mmio)) { spdk_wmb(); pqpair->stat->sq_doobell_updates++; g_thread_mmio_ctrlr = pctrlr; spdk_mmio_write_4(pqpair->sq_tdbl, pqpair->sq_tail); g_thread_mmio_ctrlr = NULL; } } static inline void nvme_pcie_qpair_ring_cq_doorbell(struct spdk_nvme_qpair *qpair) { struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(qpair->ctrlr); bool need_mmio = true; if (spdk_unlikely(pqpair->flags.has_shadow_doorbell)) { need_mmio = nvme_pcie_qpair_update_mmio_required(qpair, pqpair->cq_head, pqpair->shadow_doorbell.cq_hdbl, pqpair->shadow_doorbell.cq_eventidx); } if (spdk_likely(need_mmio)) { pqpair->stat->cq_doorbell_updates++; g_thread_mmio_ctrlr = pctrlr; spdk_mmio_write_4(pqpair->cq_hdbl, pqpair->cq_head); g_thread_mmio_ctrlr = NULL; } } int nvme_pcie_qpair_reset(struct spdk_nvme_qpair *qpair); int nvme_pcie_qpair_construct(struct spdk_nvme_qpair *qpair, const struct spdk_nvme_io_qpair_opts *opts); int nvme_pcie_ctrlr_construct_admin_qpair(struct spdk_nvme_ctrlr *ctrlr, uint16_t num_entries); void nvme_pcie_qpair_insert_pending_admin_request(struct spdk_nvme_qpair *qpair, struct nvme_request *req, struct spdk_nvme_cpl *cpl); void nvme_pcie_qpair_complete_pending_admin_request(struct spdk_nvme_qpair *qpair); int nvme_pcie_ctrlr_cmd_create_io_cq(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *io_que, spdk_nvme_cmd_cb cb_fn, void *cb_arg); int nvme_pcie_ctrlr_cmd_create_io_sq(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *io_que, spdk_nvme_cmd_cb cb_fn, void *cb_arg); int nvme_pcie_ctrlr_cmd_delete_io_cq(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair, spdk_nvme_cmd_cb cb_fn, void *cb_arg); int nvme_pcie_ctrlr_cmd_delete_io_sq(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair, spdk_nvme_cmd_cb cb_fn, void *cb_arg); int nvme_pcie_ctrlr_connect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair); void nvme_pcie_ctrlr_disconnect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair); void nvme_pcie_qpair_abort_trackers(struct spdk_nvme_qpair *qpair, uint32_t dnr); void nvme_pcie_qpair_manual_complete_tracker(struct spdk_nvme_qpair *qpair, struct nvme_tracker *tr, uint32_t sct, uint32_t sc, uint32_t dnr, bool print_on_error); void nvme_pcie_qpair_complete_tracker(struct spdk_nvme_qpair *qpair, struct nvme_tracker *tr, struct spdk_nvme_cpl *cpl, bool print_on_error); void nvme_pcie_qpair_submit_tracker(struct spdk_nvme_qpair *qpair, struct nvme_tracker *tr); void nvme_pcie_admin_qpair_abort_aers(struct spdk_nvme_qpair *qpair); void nvme_pcie_admin_qpair_destroy(struct spdk_nvme_qpair *qpair); void nvme_pcie_qpair_abort_reqs(struct spdk_nvme_qpair *qpair, uint32_t dnr); int32_t nvme_pcie_qpair_process_completions(struct spdk_nvme_qpair *qpair, uint32_t max_completions); int nvme_pcie_qpair_destroy(struct spdk_nvme_qpair *qpair); struct spdk_nvme_qpair *nvme_pcie_ctrlr_create_io_qpair(struct spdk_nvme_ctrlr *ctrlr, uint16_t qid, const struct spdk_nvme_io_qpair_opts *opts); int nvme_pcie_ctrlr_delete_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair); int nvme_pcie_qpair_submit_request(struct spdk_nvme_qpair *qpair, struct nvme_request *req); struct spdk_nvme_transport_poll_group *nvme_pcie_poll_group_create(void); int nvme_pcie_poll_group_connect_qpair(struct spdk_nvme_qpair *qpair); int nvme_pcie_poll_group_disconnect_qpair(struct spdk_nvme_qpair *qpair); int nvme_pcie_poll_group_add(struct spdk_nvme_transport_poll_group *tgroup, struct spdk_nvme_qpair *qpair); int nvme_pcie_poll_group_remove(struct spdk_nvme_transport_poll_group *tgroup, struct spdk_nvme_qpair *qpair); int64_t nvme_pcie_poll_group_process_completions(struct spdk_nvme_transport_poll_group *tgroup, uint32_t completions_per_qpair, spdk_nvme_disconnected_qpair_cb disconnected_qpair_cb); int nvme_pcie_poll_group_destroy(struct spdk_nvme_transport_poll_group *tgroup); #endif