diff --git a/lib/nvme/nvme_pcie.c b/lib/nvme/nvme_pcie.c index 098fc64b2..27fc6bbdd 100644 --- a/lib/nvme/nvme_pcie.c +++ b/lib/nvme/nvme_pcie.c @@ -52,9 +52,7 @@ struct nvme_pcie_enum_ctx { static int nvme_pcie_ctrlr_attach(struct spdk_nvme_probe_ctx *probe_ctx, struct spdk_pci_addr *pci_addr); -static int nvme_pcie_qpair_destroy(struct spdk_nvme_qpair *qpair); -__thread struct nvme_pcie_ctrlr *g_thread_mmio_ctrlr = NULL; static uint16_t g_signal_lock; static bool g_sigset = false; @@ -755,245 +753,6 @@ nvme_pcie_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr) return 0; } -/* Used when dst points to MMIO (i.e. CMB) in a virtual machine - in these cases we must - * not use wide instructions because QEMU will not emulate such instructions to MMIO space. - * So this function ensures we only copy 8 bytes at a time. - */ -static inline void -nvme_pcie_copy_command_mmio(struct spdk_nvme_cmd *dst, const struct spdk_nvme_cmd *src) -{ - uint64_t *dst64 = (uint64_t *)dst; - const uint64_t *src64 = (const uint64_t *)src; - uint32_t i; - - for (i = 0; i < sizeof(*dst) / 8; i++) { - dst64[i] = src64[i]; - } -} - -static inline void -nvme_pcie_copy_command(struct spdk_nvme_cmd *dst, const struct spdk_nvme_cmd *src) -{ - /* dst and src are known to be non-overlapping and 64-byte aligned. */ -#if defined(__SSE2__) - __m128i *d128 = (__m128i *)dst; - const __m128i *s128 = (const __m128i *)src; - - _mm_stream_si128(&d128[0], _mm_load_si128(&s128[0])); - _mm_stream_si128(&d128[1], _mm_load_si128(&s128[1])); - _mm_stream_si128(&d128[2], _mm_load_si128(&s128[2])); - _mm_stream_si128(&d128[3], _mm_load_si128(&s128[3])); -#else - *dst = *src; -#endif -} - -static inline int -nvme_pcie_qpair_need_event(uint16_t event_idx, uint16_t new_idx, uint16_t old) -{ - return (uint16_t)(new_idx - event_idx) <= (uint16_t)(new_idx - old); -} - -static bool -nvme_pcie_qpair_update_mmio_required(struct spdk_nvme_qpair *qpair, uint16_t value, - volatile uint32_t *shadow_db, - volatile uint32_t *eventidx) -{ - uint16_t old; - - if (!shadow_db) { - return true; - } - - old = *shadow_db; - *shadow_db = value; - - /* - * Ensure that the doorbell is updated before reading the EventIdx from - * memory - */ - spdk_mb(); - - if (!nvme_pcie_qpair_need_event(*eventidx, value, old)) { - return false; - } - - return true; -} - -static inline void -nvme_pcie_qpair_ring_sq_doorbell(struct spdk_nvme_qpair *qpair) -{ - struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); - struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(qpair->ctrlr); - bool need_mmio = true; - - if (qpair->first_fused_submitted) { - /* This is first cmd of two fused commands - don't ring doorbell */ - qpair->first_fused_submitted = 0; - return; - } - - if (spdk_unlikely(pqpair->flags.has_shadow_doorbell)) { - need_mmio = nvme_pcie_qpair_update_mmio_required(qpair, - pqpair->sq_tail, - pqpair->shadow_doorbell.sq_tdbl, - pqpair->shadow_doorbell.sq_eventidx); - } - - if (spdk_likely(need_mmio)) { - spdk_wmb(); - g_thread_mmio_ctrlr = pctrlr; - spdk_mmio_write_4(pqpair->sq_tdbl, pqpair->sq_tail); - g_thread_mmio_ctrlr = NULL; - } -} - -static inline void -nvme_pcie_qpair_ring_cq_doorbell(struct spdk_nvme_qpair *qpair) -{ - struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); - struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(qpair->ctrlr); - bool need_mmio = true; - - if (spdk_unlikely(pqpair->flags.has_shadow_doorbell)) { - need_mmio = nvme_pcie_qpair_update_mmio_required(qpair, - pqpair->cq_head, - pqpair->shadow_doorbell.cq_hdbl, - pqpair->shadow_doorbell.cq_eventidx); - } - - if (spdk_likely(need_mmio)) { - g_thread_mmio_ctrlr = pctrlr; - spdk_mmio_write_4(pqpair->cq_hdbl, pqpair->cq_head); - g_thread_mmio_ctrlr = NULL; - } -} - -static void -nvme_pcie_qpair_submit_tracker(struct spdk_nvme_qpair *qpair, struct nvme_tracker *tr) -{ - struct nvme_request *req; - struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); - struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; - - req = tr->req; - assert(req != NULL); - - if (req->cmd.fuse == SPDK_NVME_IO_FLAGS_FUSE_FIRST) { - /* This is first cmd of two fused commands - don't ring doorbell */ - qpair->first_fused_submitted = 1; - } - - /* Don't use wide instructions to copy NVMe command, this is limited by QEMU - * virtual NVMe controller, the maximum access width is 8 Bytes for one time. - */ - if (spdk_unlikely((ctrlr->quirks & NVME_QUIRK_MAXIMUM_PCI_ACCESS_WIDTH) && pqpair->sq_in_cmb)) { - nvme_pcie_copy_command_mmio(&pqpair->cmd[pqpair->sq_tail], &req->cmd); - } else { - /* Copy the command from the tracker to the submission queue. */ - nvme_pcie_copy_command(&pqpair->cmd[pqpair->sq_tail], &req->cmd); - } - - if (spdk_unlikely(++pqpair->sq_tail == pqpair->num_entries)) { - pqpair->sq_tail = 0; - } - - if (spdk_unlikely(pqpair->sq_tail == pqpair->sq_head)) { - SPDK_ERRLOG("sq_tail is passing sq_head!\n"); - } - - if (!pqpair->flags.delay_cmd_submit) { - nvme_pcie_qpair_ring_sq_doorbell(qpair); - } -} - -static void -nvme_pcie_qpair_complete_tracker(struct spdk_nvme_qpair *qpair, struct nvme_tracker *tr, - struct spdk_nvme_cpl *cpl, bool print_on_error) -{ - struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); - struct nvme_request *req; - bool retry, error; - bool req_from_current_proc = true; - - req = tr->req; - - assert(req != NULL); - - error = spdk_nvme_cpl_is_error(cpl); - retry = error && nvme_completion_is_retry(cpl) && - req->retries < pqpair->retry_count; - - if (error && print_on_error && !qpair->ctrlr->opts.disable_error_logging) { - spdk_nvme_qpair_print_command(qpair, &req->cmd); - spdk_nvme_qpair_print_completion(qpair, cpl); - } - - assert(cpl->cid == req->cmd.cid); - - if (retry) { - req->retries++; - nvme_pcie_qpair_submit_tracker(qpair, tr); - } else { - TAILQ_REMOVE(&pqpair->outstanding_tr, tr, tq_list); - - /* Only check admin requests from different processes. */ - if (nvme_qpair_is_admin_queue(qpair) && req->pid != getpid()) { - req_from_current_proc = false; - nvme_pcie_qpair_insert_pending_admin_request(qpair, req, cpl); - } else { - nvme_complete_request(tr->cb_fn, tr->cb_arg, qpair, req, cpl); - } - - if (req_from_current_proc == true) { - nvme_qpair_free_request(qpair, req); - } - - tr->req = NULL; - - TAILQ_INSERT_HEAD(&pqpair->free_tr, tr, tq_list); - } -} - -static void -nvme_pcie_qpair_manual_complete_tracker(struct spdk_nvme_qpair *qpair, - struct nvme_tracker *tr, uint32_t sct, uint32_t sc, uint32_t dnr, - bool print_on_error) -{ - struct spdk_nvme_cpl cpl; - - memset(&cpl, 0, sizeof(cpl)); - cpl.sqid = qpair->id; - cpl.cid = tr->cid; - cpl.status.sct = sct; - cpl.status.sc = sc; - cpl.status.dnr = dnr; - nvme_pcie_qpair_complete_tracker(qpair, tr, &cpl, print_on_error); -} - -static void -nvme_pcie_qpair_abort_trackers(struct spdk_nvme_qpair *qpair, uint32_t dnr) -{ - struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); - struct nvme_tracker *tr, *temp, *last; - - last = TAILQ_LAST(&pqpair->outstanding_tr, nvme_outstanding_tr_head); - - /* Abort previously submitted (outstanding) trs */ - TAILQ_FOREACH_SAFE(tr, &pqpair->outstanding_tr, tq_list, temp) { - if (!qpair->ctrlr->opts.disable_error_logging) { - SPDK_ERRLOG("aborting outstanding command\n"); - } - nvme_pcie_qpair_manual_complete_tracker(qpair, tr, SPDK_NVME_SCT_GENERIC, - SPDK_NVME_SC_ABORTED_BY_REQUEST, dnr, true); - - if (tr == last) { - break; - } - } -} - static int nvme_pcie_qpair_iterate_requests(struct spdk_nvme_qpair *qpair, int (*iter_fn)(struct nvme_request *req, void *arg), @@ -1017,172 +776,6 @@ nvme_pcie_qpair_iterate_requests(struct spdk_nvme_qpair *qpair, return 0; } -static void -nvme_pcie_admin_qpair_abort_aers(struct spdk_nvme_qpair *qpair) -{ - struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); - struct nvme_tracker *tr; - - tr = TAILQ_FIRST(&pqpair->outstanding_tr); - while (tr != NULL) { - assert(tr->req != NULL); - if (tr->req->cmd.opc == SPDK_NVME_OPC_ASYNC_EVENT_REQUEST) { - nvme_pcie_qpair_manual_complete_tracker(qpair, tr, - SPDK_NVME_SCT_GENERIC, SPDK_NVME_SC_ABORTED_SQ_DELETION, 0, - false); - tr = TAILQ_FIRST(&pqpair->outstanding_tr); - } else { - tr = TAILQ_NEXT(tr, tq_list); - } - } -} - -static void -nvme_pcie_admin_qpair_destroy(struct spdk_nvme_qpair *qpair) -{ - nvme_pcie_admin_qpair_abort_aers(qpair); -} - -static int -nvme_pcie_qpair_destroy(struct spdk_nvme_qpair *qpair) -{ - struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); - - if (nvme_qpair_is_admin_queue(qpair)) { - nvme_pcie_admin_qpair_destroy(qpair); - } - /* - * We check sq_vaddr and cq_vaddr to see if the user specified the memory - * buffers when creating the I/O queue. - * If the user specified them, we cannot free that memory. - * Nor do we free it if it's in the CMB. - */ - if (!pqpair->sq_vaddr && pqpair->cmd && !pqpair->sq_in_cmb) { - spdk_free(pqpair->cmd); - } - if (!pqpair->cq_vaddr && pqpair->cpl) { - spdk_free(pqpair->cpl); - } - if (pqpair->tr) { - spdk_free(pqpair->tr); - } - - nvme_qpair_deinit(qpair); - - spdk_free(pqpair); - - return 0; -} - -static void -nvme_pcie_qpair_abort_reqs(struct spdk_nvme_qpair *qpair, uint32_t dnr) -{ - nvme_pcie_qpair_abort_trackers(qpair, dnr); -} - -static struct spdk_nvme_qpair * -nvme_pcie_ctrlr_create_io_qpair(struct spdk_nvme_ctrlr *ctrlr, uint16_t qid, - const struct spdk_nvme_io_qpair_opts *opts) -{ - struct nvme_pcie_qpair *pqpair; - struct spdk_nvme_qpair *qpair; - int rc; - - assert(ctrlr != NULL); - - pqpair = spdk_zmalloc(sizeof(*pqpair), 64, NULL, - SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE); - if (pqpair == NULL) { - return NULL; - } - - pqpair->num_entries = opts->io_queue_size; - pqpair->flags.delay_cmd_submit = opts->delay_cmd_submit; - - qpair = &pqpair->qpair; - - rc = nvme_qpair_init(qpair, qid, ctrlr, opts->qprio, opts->io_queue_requests); - if (rc != 0) { - nvme_pcie_qpair_destroy(qpair); - return NULL; - } - - rc = nvme_pcie_qpair_construct(qpair, opts); - - if (rc != 0) { - nvme_pcie_qpair_destroy(qpair); - return NULL; - } - - return qpair; -} - -static int32_t nvme_pcie_qpair_process_completions(struct spdk_nvme_qpair *qpair, - uint32_t max_completions); - -static int -nvme_pcie_ctrlr_delete_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) -{ - struct nvme_completion_poll_status *status; - int rc; - - assert(ctrlr != NULL); - - if (ctrlr->is_removed) { - goto free; - } - - status = calloc(1, sizeof(*status)); - if (!status) { - SPDK_ERRLOG("Failed to allocate status tracker\n"); - return -ENOMEM; - } - - /* Delete the I/O submission queue */ - rc = nvme_pcie_ctrlr_cmd_delete_io_sq(ctrlr, qpair, nvme_completion_poll_cb, status); - if (rc != 0) { - SPDK_ERRLOG("Failed to send request to delete_io_sq with rc=%d\n", rc); - free(status); - return rc; - } - if (nvme_wait_for_completion(ctrlr->adminq, status)) { - if (!status->timed_out) { - free(status); - } - return -1; - } - - /* Now that the submission queue is deleted, the device is supposed to have - * completed any outstanding I/O. Try to complete them. If they don't complete, - * they'll be marked as aborted and completed below. */ - nvme_pcie_qpair_process_completions(qpair, 0); - - memset(status, 0, sizeof(*status)); - /* Delete the completion queue */ - rc = nvme_pcie_ctrlr_cmd_delete_io_cq(ctrlr, qpair, nvme_completion_poll_cb, status); - if (rc != 0) { - SPDK_ERRLOG("Failed to send request to delete_io_cq with rc=%d\n", rc); - free(status); - return rc; - } - if (nvme_wait_for_completion(ctrlr->adminq, status)) { - if (!status->timed_out) { - free(status); - } - return -1; - } - free(status); - -free: - if (qpair->no_deletion_notification_needed == 0) { - /* Abort the rest of the I/O */ - nvme_pcie_qpair_abort_trackers(qpair, 1); - } - - nvme_pcie_qpair_destroy(qpair); - return 0; -} - static void nvme_pcie_fail_request_bad_vtophys(struct spdk_nvme_qpair *qpair, struct nvme_tracker *tr) { @@ -1695,154 +1288,6 @@ exit: return rc; } -static void -nvme_pcie_qpair_check_timeout(struct spdk_nvme_qpair *qpair) -{ - uint64_t t02; - struct nvme_tracker *tr, *tmp; - struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); - struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; - struct spdk_nvme_ctrlr_process *active_proc; - - /* Don't check timeouts during controller initialization. */ - if (ctrlr->state != NVME_CTRLR_STATE_READY) { - return; - } - - if (nvme_qpair_is_admin_queue(qpair)) { - active_proc = nvme_ctrlr_get_current_process(ctrlr); - } else { - active_proc = qpair->active_proc; - } - - /* Only check timeouts if the current process has a timeout callback. */ - if (active_proc == NULL || active_proc->timeout_cb_fn == NULL) { - return; - } - - t02 = spdk_get_ticks(); - TAILQ_FOREACH_SAFE(tr, &pqpair->outstanding_tr, tq_list, tmp) { - assert(tr->req != NULL); - - if (nvme_request_check_timeout(tr->req, tr->cid, active_proc, t02)) { - /* - * The requests are in order, so as soon as one has not timed out, - * stop iterating. - */ - break; - } - } -} - -static int32_t -nvme_pcie_qpair_process_completions(struct spdk_nvme_qpair *qpair, uint32_t max_completions) -{ - struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); - struct nvme_tracker *tr; - struct spdk_nvme_cpl *cpl, *next_cpl; - uint32_t num_completions = 0; - struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; - uint16_t next_cq_head; - uint8_t next_phase; - bool next_is_valid = false; - - if (spdk_unlikely(nvme_qpair_is_admin_queue(qpair))) { - nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); - } - - if (max_completions == 0 || max_completions > pqpair->max_completions_cap) { - /* - * max_completions == 0 means unlimited, but complete at most - * max_completions_cap batch of I/O at a time so that the completion - * queue doorbells don't wrap around. - */ - max_completions = pqpair->max_completions_cap; - } - - while (1) { - cpl = &pqpair->cpl[pqpair->cq_head]; - - if (!next_is_valid && cpl->status.p != pqpair->flags.phase) { - break; - } - - if (spdk_likely(pqpair->cq_head + 1 != pqpair->num_entries)) { - next_cq_head = pqpair->cq_head + 1; - next_phase = pqpair->flags.phase; - } else { - next_cq_head = 0; - next_phase = !pqpair->flags.phase; - } - next_cpl = &pqpair->cpl[next_cq_head]; - next_is_valid = (next_cpl->status.p == next_phase); - if (next_is_valid) { - __builtin_prefetch(&pqpair->tr[next_cpl->cid]); - } - -#ifdef __PPC64__ - /* - * This memory barrier prevents reordering of: - * - load after store from/to tr - * - load after load cpl phase and cpl cid - */ - spdk_mb(); -#elif defined(__aarch64__) - __asm volatile("dmb oshld" ::: "memory"); -#endif - - if (spdk_unlikely(++pqpair->cq_head == pqpair->num_entries)) { - pqpair->cq_head = 0; - pqpair->flags.phase = !pqpair->flags.phase; - } - - tr = &pqpair->tr[cpl->cid]; - /* Prefetch the req's STAILQ_ENTRY since we'll need to access it - * as part of putting the req back on the qpair's free list. - */ - __builtin_prefetch(&tr->req->stailq); - pqpair->sq_head = cpl->sqhd; - - if (tr->req) { - nvme_pcie_qpair_complete_tracker(qpair, tr, cpl, true); - } else { - SPDK_ERRLOG("cpl does not map to outstanding cmd\n"); - spdk_nvme_qpair_print_completion(qpair, cpl); - assert(0); - } - - if (++num_completions == max_completions) { - break; - } - } - - if (num_completions > 0) { - nvme_pcie_qpair_ring_cq_doorbell(qpair); - } - - if (pqpair->flags.delay_cmd_submit) { - if (pqpair->last_sq_tail != pqpair->sq_tail) { - nvme_pcie_qpair_ring_sq_doorbell(qpair); - pqpair->last_sq_tail = pqpair->sq_tail; - } - } - - if (spdk_unlikely(ctrlr->timeout_enabled)) { - /* - * User registered for timeout callback - */ - nvme_pcie_qpair_check_timeout(qpair); - } - - /* Before returning, complete any pending admin request. */ - if (spdk_unlikely(nvme_qpair_is_admin_queue(qpair))) { - nvme_pcie_qpair_complete_pending_admin_request(qpair); - - nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); - } - - return num_completions; -} - static struct spdk_pci_id nvme_pci_driver_id[] = { { .class_id = SPDK_PCI_CLASS_NVME, diff --git a/lib/nvme/nvme_pcie_common.c b/lib/nvme/nvme_pcie_common.c index 9fef12cb6..0ef56cb9e 100644 --- a/lib/nvme/nvme_pcie_common.c +++ b/lib/nvme/nvme_pcie_common.c @@ -40,6 +40,8 @@ #include "nvme_internal.h" #include "nvme_pcie_internal.h" +__thread struct nvme_pcie_ctrlr *g_thread_mmio_ctrlr = NULL; + static uint64_t nvme_pcie_vtophys(struct spdk_nvme_ctrlr *ctrlr, const void *buf) { @@ -526,6 +528,474 @@ nvme_pcie_ctrlr_disconnect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme { } +/* Used when dst points to MMIO (i.e. CMB) in a virtual machine - in these cases we must + * not use wide instructions because QEMU will not emulate such instructions to MMIO space. + * So this function ensures we only copy 8 bytes at a time. + */ +static inline void +nvme_pcie_copy_command_mmio(struct spdk_nvme_cmd *dst, const struct spdk_nvme_cmd *src) +{ + uint64_t *dst64 = (uint64_t *)dst; + const uint64_t *src64 = (const uint64_t *)src; + uint32_t i; + + for (i = 0; i < sizeof(*dst) / 8; i++) { + dst64[i] = src64[i]; + } +} + +static inline void +nvme_pcie_copy_command(struct spdk_nvme_cmd *dst, const struct spdk_nvme_cmd *src) +{ + /* dst and src are known to be non-overlapping and 64-byte aligned. */ +#if defined(__SSE2__) + __m128i *d128 = (__m128i *)dst; + const __m128i *s128 = (const __m128i *)src; + + _mm_stream_si128(&d128[0], _mm_load_si128(&s128[0])); + _mm_stream_si128(&d128[1], _mm_load_si128(&s128[1])); + _mm_stream_si128(&d128[2], _mm_load_si128(&s128[2])); + _mm_stream_si128(&d128[3], _mm_load_si128(&s128[3])); +#else + *dst = *src; +#endif +} + +void +nvme_pcie_qpair_submit_tracker(struct spdk_nvme_qpair *qpair, struct nvme_tracker *tr) +{ + struct nvme_request *req; + struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); + struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; + + req = tr->req; + assert(req != NULL); + + if (req->cmd.fuse == SPDK_NVME_IO_FLAGS_FUSE_FIRST) { + /* This is first cmd of two fused commands - don't ring doorbell */ + qpair->first_fused_submitted = 1; + } + + /* Don't use wide instructions to copy NVMe command, this is limited by QEMU + * virtual NVMe controller, the maximum access width is 8 Bytes for one time. + */ + if (spdk_unlikely((ctrlr->quirks & NVME_QUIRK_MAXIMUM_PCI_ACCESS_WIDTH) && pqpair->sq_in_cmb)) { + nvme_pcie_copy_command_mmio(&pqpair->cmd[pqpair->sq_tail], &req->cmd); + } else { + /* Copy the command from the tracker to the submission queue. */ + nvme_pcie_copy_command(&pqpair->cmd[pqpair->sq_tail], &req->cmd); + } + + if (spdk_unlikely(++pqpair->sq_tail == pqpair->num_entries)) { + pqpair->sq_tail = 0; + } + + if (spdk_unlikely(pqpair->sq_tail == pqpair->sq_head)) { + SPDK_ERRLOG("sq_tail is passing sq_head!\n"); + } + + if (!pqpair->flags.delay_cmd_submit) { + nvme_pcie_qpair_ring_sq_doorbell(qpair); + } +} + +void +nvme_pcie_qpair_complete_tracker(struct spdk_nvme_qpair *qpair, struct nvme_tracker *tr, + struct spdk_nvme_cpl *cpl, bool print_on_error) +{ + struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); + struct nvme_request *req; + bool retry, error; + bool req_from_current_proc = true; + + req = tr->req; + + assert(req != NULL); + + error = spdk_nvme_cpl_is_error(cpl); + retry = error && nvme_completion_is_retry(cpl) && + req->retries < pqpair->retry_count; + + if (error && print_on_error && !qpair->ctrlr->opts.disable_error_logging) { + spdk_nvme_qpair_print_command(qpair, &req->cmd); + spdk_nvme_qpair_print_completion(qpair, cpl); + } + + assert(cpl->cid == req->cmd.cid); + + if (retry) { + req->retries++; + nvme_pcie_qpair_submit_tracker(qpair, tr); + } else { + TAILQ_REMOVE(&pqpair->outstanding_tr, tr, tq_list); + + /* Only check admin requests from different processes. */ + if (nvme_qpair_is_admin_queue(qpair) && req->pid != getpid()) { + req_from_current_proc = false; + nvme_pcie_qpair_insert_pending_admin_request(qpair, req, cpl); + } else { + nvme_complete_request(tr->cb_fn, tr->cb_arg, qpair, req, cpl); + } + + if (req_from_current_proc == true) { + nvme_qpair_free_request(qpair, req); + } + + tr->req = NULL; + + TAILQ_INSERT_HEAD(&pqpair->free_tr, tr, tq_list); + } +} + +void +nvme_pcie_qpair_manual_complete_tracker(struct spdk_nvme_qpair *qpair, + struct nvme_tracker *tr, uint32_t sct, uint32_t sc, uint32_t dnr, + bool print_on_error) +{ + struct spdk_nvme_cpl cpl; + + memset(&cpl, 0, sizeof(cpl)); + cpl.sqid = qpair->id; + cpl.cid = tr->cid; + cpl.status.sct = sct; + cpl.status.sc = sc; + cpl.status.dnr = dnr; + nvme_pcie_qpair_complete_tracker(qpair, tr, &cpl, print_on_error); +} + +void +nvme_pcie_qpair_abort_trackers(struct spdk_nvme_qpair *qpair, uint32_t dnr) +{ + struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); + struct nvme_tracker *tr, *temp, *last; + + last = TAILQ_LAST(&pqpair->outstanding_tr, nvme_outstanding_tr_head); + + /* Abort previously submitted (outstanding) trs */ + TAILQ_FOREACH_SAFE(tr, &pqpair->outstanding_tr, tq_list, temp) { + if (!qpair->ctrlr->opts.disable_error_logging) { + SPDK_ERRLOG("aborting outstanding command\n"); + } + nvme_pcie_qpair_manual_complete_tracker(qpair, tr, SPDK_NVME_SCT_GENERIC, + SPDK_NVME_SC_ABORTED_BY_REQUEST, dnr, true); + + if (tr == last) { + break; + } + } +} + +void +nvme_pcie_admin_qpair_abort_aers(struct spdk_nvme_qpair *qpair) +{ + struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); + struct nvme_tracker *tr; + + tr = TAILQ_FIRST(&pqpair->outstanding_tr); + while (tr != NULL) { + assert(tr->req != NULL); + if (tr->req->cmd.opc == SPDK_NVME_OPC_ASYNC_EVENT_REQUEST) { + nvme_pcie_qpair_manual_complete_tracker(qpair, tr, + SPDK_NVME_SCT_GENERIC, SPDK_NVME_SC_ABORTED_SQ_DELETION, 0, + false); + tr = TAILQ_FIRST(&pqpair->outstanding_tr); + } else { + tr = TAILQ_NEXT(tr, tq_list); + } + } +} + +void +nvme_pcie_admin_qpair_destroy(struct spdk_nvme_qpair *qpair) +{ + nvme_pcie_admin_qpair_abort_aers(qpair); +} + +void +nvme_pcie_qpair_abort_reqs(struct spdk_nvme_qpair *qpair, uint32_t dnr) +{ + nvme_pcie_qpair_abort_trackers(qpair, dnr); +} + +static void +nvme_pcie_qpair_check_timeout(struct spdk_nvme_qpair *qpair) +{ + uint64_t t02; + struct nvme_tracker *tr, *tmp; + struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); + struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; + struct spdk_nvme_ctrlr_process *active_proc; + + /* Don't check timeouts during controller initialization. */ + if (ctrlr->state != NVME_CTRLR_STATE_READY) { + return; + } + + if (nvme_qpair_is_admin_queue(qpair)) { + active_proc = nvme_ctrlr_get_current_process(ctrlr); + } else { + active_proc = qpair->active_proc; + } + + /* Only check timeouts if the current process has a timeout callback. */ + if (active_proc == NULL || active_proc->timeout_cb_fn == NULL) { + return; + } + + t02 = spdk_get_ticks(); + TAILQ_FOREACH_SAFE(tr, &pqpair->outstanding_tr, tq_list, tmp) { + assert(tr->req != NULL); + + if (nvme_request_check_timeout(tr->req, tr->cid, active_proc, t02)) { + /* + * The requests are in order, so as soon as one has not timed out, + * stop iterating. + */ + break; + } + } +} + +int32_t +nvme_pcie_qpair_process_completions(struct spdk_nvme_qpair *qpair, uint32_t max_completions) +{ + struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); + struct nvme_tracker *tr; + struct spdk_nvme_cpl *cpl, *next_cpl; + uint32_t num_completions = 0; + struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; + uint16_t next_cq_head; + uint8_t next_phase; + bool next_is_valid = false; + + if (spdk_unlikely(nvme_qpair_is_admin_queue(qpair))) { + nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); + } + + if (max_completions == 0 || max_completions > pqpair->max_completions_cap) { + /* + * max_completions == 0 means unlimited, but complete at most + * max_completions_cap batch of I/O at a time so that the completion + * queue doorbells don't wrap around. + */ + max_completions = pqpair->max_completions_cap; + } + + while (1) { + cpl = &pqpair->cpl[pqpair->cq_head]; + + if (!next_is_valid && cpl->status.p != pqpair->flags.phase) { + break; + } + + if (spdk_likely(pqpair->cq_head + 1 != pqpair->num_entries)) { + next_cq_head = pqpair->cq_head + 1; + next_phase = pqpair->flags.phase; + } else { + next_cq_head = 0; + next_phase = !pqpair->flags.phase; + } + next_cpl = &pqpair->cpl[next_cq_head]; + next_is_valid = (next_cpl->status.p == next_phase); + if (next_is_valid) { + __builtin_prefetch(&pqpair->tr[next_cpl->cid]); + } + +#ifdef __PPC64__ + /* + * This memory barrier prevents reordering of: + * - load after store from/to tr + * - load after load cpl phase and cpl cid + */ + spdk_mb(); +#elif defined(__aarch64__) + __asm volatile("dmb oshld" ::: "memory"); +#endif + + if (spdk_unlikely(++pqpair->cq_head == pqpair->num_entries)) { + pqpair->cq_head = 0; + pqpair->flags.phase = !pqpair->flags.phase; + } + + tr = &pqpair->tr[cpl->cid]; + /* Prefetch the req's STAILQ_ENTRY since we'll need to access it + * as part of putting the req back on the qpair's free list. + */ + __builtin_prefetch(&tr->req->stailq); + pqpair->sq_head = cpl->sqhd; + + if (tr->req) { + nvme_pcie_qpair_complete_tracker(qpair, tr, cpl, true); + } else { + SPDK_ERRLOG("cpl does not map to outstanding cmd\n"); + spdk_nvme_qpair_print_completion(qpair, cpl); + assert(0); + } + + if (++num_completions == max_completions) { + break; + } + } + + if (num_completions > 0) { + nvme_pcie_qpair_ring_cq_doorbell(qpair); + } + + if (pqpair->flags.delay_cmd_submit) { + if (pqpair->last_sq_tail != pqpair->sq_tail) { + nvme_pcie_qpair_ring_sq_doorbell(qpair); + pqpair->last_sq_tail = pqpair->sq_tail; + } + } + + if (spdk_unlikely(ctrlr->timeout_enabled)) { + /* + * User registered for timeout callback + */ + nvme_pcie_qpair_check_timeout(qpair); + } + + /* Before returning, complete any pending admin request. */ + if (spdk_unlikely(nvme_qpair_is_admin_queue(qpair))) { + nvme_pcie_qpair_complete_pending_admin_request(qpair); + + nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); + } + + return num_completions; +} + +int +nvme_pcie_qpair_destroy(struct spdk_nvme_qpair *qpair) +{ + struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); + + if (nvme_qpair_is_admin_queue(qpair)) { + nvme_pcie_admin_qpair_destroy(qpair); + } + /* + * We check sq_vaddr and cq_vaddr to see if the user specified the memory + * buffers when creating the I/O queue. + * If the user specified them, we cannot free that memory. + * Nor do we free it if it's in the CMB. + */ + if (!pqpair->sq_vaddr && pqpair->cmd && !pqpair->sq_in_cmb) { + spdk_free(pqpair->cmd); + } + if (!pqpair->cq_vaddr && pqpair->cpl) { + spdk_free(pqpair->cpl); + } + if (pqpair->tr) { + spdk_free(pqpair->tr); + } + + nvme_qpair_deinit(qpair); + + spdk_free(pqpair); + + return 0; +} + +struct spdk_nvme_qpair * +nvme_pcie_ctrlr_create_io_qpair(struct spdk_nvme_ctrlr *ctrlr, uint16_t qid, + const struct spdk_nvme_io_qpair_opts *opts) +{ + struct nvme_pcie_qpair *pqpair; + struct spdk_nvme_qpair *qpair; + int rc; + + assert(ctrlr != NULL); + + pqpair = spdk_zmalloc(sizeof(*pqpair), 64, NULL, + SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE); + if (pqpair == NULL) { + return NULL; + } + + pqpair->num_entries = opts->io_queue_size; + pqpair->flags.delay_cmd_submit = opts->delay_cmd_submit; + + qpair = &pqpair->qpair; + + rc = nvme_qpair_init(qpair, qid, ctrlr, opts->qprio, opts->io_queue_requests); + if (rc != 0) { + nvme_pcie_qpair_destroy(qpair); + return NULL; + } + + rc = nvme_pcie_qpair_construct(qpair, opts); + + if (rc != 0) { + nvme_pcie_qpair_destroy(qpair); + return NULL; + } + + return qpair; +} + +int +nvme_pcie_ctrlr_delete_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) +{ + struct nvme_completion_poll_status *status; + int rc; + + assert(ctrlr != NULL); + + if (ctrlr->is_removed) { + goto free; + } + + status = calloc(1, sizeof(*status)); + if (!status) { + SPDK_ERRLOG("Failed to allocate status tracker\n"); + return -ENOMEM; + } + + /* Delete the I/O submission queue */ + rc = nvme_pcie_ctrlr_cmd_delete_io_sq(ctrlr, qpair, nvme_completion_poll_cb, status); + if (rc != 0) { + SPDK_ERRLOG("Failed to send request to delete_io_sq with rc=%d\n", rc); + free(status); + return rc; + } + if (nvme_wait_for_completion(ctrlr->adminq, status)) { + if (!status->timed_out) { + free(status); + } + return -1; + } + + /* Now that the submission queue is deleted, the device is supposed to have + * completed any outstanding I/O. Try to complete them. If they don't complete, + * they'll be marked as aborted and completed below. */ + nvme_pcie_qpair_process_completions(qpair, 0); + + memset(status, 0, sizeof(*status)); + /* Delete the completion queue */ + rc = nvme_pcie_ctrlr_cmd_delete_io_cq(ctrlr, qpair, nvme_completion_poll_cb, status); + if (rc != 0) { + SPDK_ERRLOG("Failed to send request to delete_io_cq with rc=%d\n", rc); + free(status); + return rc; + } + if (nvme_wait_for_completion(ctrlr->adminq, status)) { + if (!status->timed_out) { + free(status); + } + return -1; + } + free(status); + +free: + if (qpair->no_deletion_notification_needed == 0) { + /* Abort the rest of the I/O */ + nvme_pcie_qpair_abort_trackers(qpair, 1); + } + + nvme_pcie_qpair_destroy(qpair); + return 0; +} + struct spdk_nvme_transport_poll_group * nvme_pcie_poll_group_create(void) { diff --git a/lib/nvme/nvme_pcie_internal.h b/lib/nvme/nvme_pcie_internal.h index 01be5b330..215cd02df 100644 --- a/lib/nvme/nvme_pcie_internal.h +++ b/lib/nvme/nvme_pcie_internal.h @@ -87,6 +87,8 @@ struct nvme_pcie_ctrlr { volatile uint32_t *doorbell_base; }; +extern __thread struct nvme_pcie_ctrlr *g_thread_mmio_ctrlr; + struct nvme_tracker { TAILQ_ENTRY(nvme_tracker) tq_list; @@ -203,6 +205,88 @@ nvme_pcie_ctrlr(struct spdk_nvme_ctrlr *ctrlr) return SPDK_CONTAINEROF(ctrlr, struct nvme_pcie_ctrlr, ctrlr); } +static inline int +nvme_pcie_qpair_need_event(uint16_t event_idx, uint16_t new_idx, uint16_t old) +{ + return (uint16_t)(new_idx - event_idx) <= (uint16_t)(new_idx - old); +} + +static inline bool +nvme_pcie_qpair_update_mmio_required(struct spdk_nvme_qpair *qpair, uint16_t value, + volatile uint32_t *shadow_db, + volatile uint32_t *eventidx) +{ + uint16_t old; + + if (!shadow_db) { + return true; + } + + old = *shadow_db; + *shadow_db = value; + + /* + * Ensure that the doorbell is updated before reading the EventIdx from + * memory + */ + spdk_mb(); + + if (!nvme_pcie_qpair_need_event(*eventidx, value, old)) { + return false; + } + + return true; +} + +static inline void +nvme_pcie_qpair_ring_sq_doorbell(struct spdk_nvme_qpair *qpair) +{ + struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); + struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(qpair->ctrlr); + bool need_mmio = true; + + if (qpair->first_fused_submitted) { + /* This is first cmd of two fused commands - don't ring doorbell */ + qpair->first_fused_submitted = 0; + return; + } + + if (spdk_unlikely(pqpair->flags.has_shadow_doorbell)) { + need_mmio = nvme_pcie_qpair_update_mmio_required(qpair, + pqpair->sq_tail, + pqpair->shadow_doorbell.sq_tdbl, + pqpair->shadow_doorbell.sq_eventidx); + } + + if (spdk_likely(need_mmio)) { + spdk_wmb(); + g_thread_mmio_ctrlr = pctrlr; + spdk_mmio_write_4(pqpair->sq_tdbl, pqpair->sq_tail); + g_thread_mmio_ctrlr = NULL; + } +} + +static inline void +nvme_pcie_qpair_ring_cq_doorbell(struct spdk_nvme_qpair *qpair) +{ + struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); + struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(qpair->ctrlr); + bool need_mmio = true; + + if (spdk_unlikely(pqpair->flags.has_shadow_doorbell)) { + need_mmio = nvme_pcie_qpair_update_mmio_required(qpair, + pqpair->cq_head, + pqpair->shadow_doorbell.cq_hdbl, + pqpair->shadow_doorbell.cq_eventidx); + } + + if (spdk_likely(need_mmio)) { + g_thread_mmio_ctrlr = pctrlr; + spdk_mmio_write_4(pqpair->cq_hdbl, pqpair->cq_head); + g_thread_mmio_ctrlr = NULL; + } +} + int nvme_pcie_qpair_reset(struct spdk_nvme_qpair *qpair); int nvme_pcie_qpair_construct(struct spdk_nvme_qpair *qpair, const struct spdk_nvme_io_qpair_opts *opts); @@ -221,6 +305,22 @@ int nvme_pcie_ctrlr_cmd_delete_io_sq(struct spdk_nvme_ctrlr *ctrlr, struct spdk_ spdk_nvme_cmd_cb cb_fn, void *cb_arg); int nvme_pcie_ctrlr_connect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair); void nvme_pcie_ctrlr_disconnect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair); +void nvme_pcie_qpair_abort_trackers(struct spdk_nvme_qpair *qpair, uint32_t dnr); +void nvme_pcie_qpair_manual_complete_tracker(struct spdk_nvme_qpair *qpair, + struct nvme_tracker *tr, uint32_t sct, uint32_t sc, uint32_t dnr, + bool print_on_error); +void nvme_pcie_qpair_complete_tracker(struct spdk_nvme_qpair *qpair, struct nvme_tracker *tr, + struct spdk_nvme_cpl *cpl, bool print_on_error); +void nvme_pcie_qpair_submit_tracker(struct spdk_nvme_qpair *qpair, struct nvme_tracker *tr); +void nvme_pcie_admin_qpair_abort_aers(struct spdk_nvme_qpair *qpair); +void nvme_pcie_admin_qpair_destroy(struct spdk_nvme_qpair *qpair); +void nvme_pcie_qpair_abort_reqs(struct spdk_nvme_qpair *qpair, uint32_t dnr); +int32_t nvme_pcie_qpair_process_completions(struct spdk_nvme_qpair *qpair, + uint32_t max_completions); +int nvme_pcie_qpair_destroy(struct spdk_nvme_qpair *qpair); +struct spdk_nvme_qpair *nvme_pcie_ctrlr_create_io_qpair(struct spdk_nvme_ctrlr *ctrlr, uint16_t qid, + const struct spdk_nvme_io_qpair_opts *opts); +int nvme_pcie_ctrlr_delete_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair); struct spdk_nvme_transport_poll_group *nvme_pcie_poll_group_create(void); int nvme_pcie_poll_group_connect_qpair(struct spdk_nvme_qpair *qpair); diff --git a/lib/nvme/nvme_vfio_user.c b/lib/nvme/nvme_vfio_user.c index d137ff892..4a5fc5aeb 100644 --- a/lib/nvme/nvme_vfio_user.c +++ b/lib/nvme/nvme_vfio_user.c @@ -354,16 +354,13 @@ nvme_vfio_ctrlr_enable(struct spdk_nvme_ctrlr *ctrlr) return 0; } -static int -nvme_vfio_qpair_destroy(struct spdk_nvme_qpair *qpair); - static int nvme_vfio_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr) { struct nvme_vfio_ctrlr *vctrlr = nvme_vfio_ctrlr(ctrlr); if (ctrlr->adminq) { - nvme_vfio_qpair_destroy(ctrlr->adminq); + nvme_pcie_qpair_destroy(ctrlr->adminq); } nvme_ctrlr_destruct_finish(ctrlr); @@ -389,294 +386,6 @@ nvme_vfio_ctrlr_get_max_sges(struct spdk_nvme_ctrlr *ctrlr) return NVME_MAX_SGES; } -static struct spdk_nvme_qpair * -nvme_vfio_ctrlr_create_io_qpair(struct spdk_nvme_ctrlr *ctrlr, uint16_t qid, - const struct spdk_nvme_io_qpair_opts *opts) -{ - struct nvme_pcie_qpair *vqpair; - struct spdk_nvme_qpair *qpair; - int rc; - - assert(ctrlr != NULL); - - vqpair = spdk_zmalloc(sizeof(*vqpair), 64, NULL, - SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE); - if (vqpair == NULL) { - return NULL; - } - - vqpair->num_entries = opts->io_queue_size; - vqpair->flags.delay_cmd_submit = opts->delay_cmd_submit; - - qpair = &vqpair->qpair; - - rc = nvme_qpair_init(qpair, qid, ctrlr, opts->qprio, opts->io_queue_requests); - if (rc != 0) { - nvme_vfio_qpair_destroy(qpair); - return NULL; - } - - rc = nvme_pcie_qpair_construct(qpair, opts); - - if (rc != 0) { - nvme_vfio_qpair_destroy(qpair); - return NULL; - } - - return qpair; -} - -static void -nvme_vfio_qpair_abort_trackers(struct spdk_nvme_qpair *qpair, uint32_t dnr); - -static int -nvme_vfio_ctrlr_delete_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) -{ - struct nvme_completion_poll_status *status; - int rc; - - assert(ctrlr != NULL); - - if (ctrlr->is_removed) { - goto free; - } - - status = calloc(1, sizeof(*status)); - if (!status) { - SPDK_ERRLOG("Failed to allocate status tracker\n"); - return -ENOMEM; - } - - /* Delete the I/O submission queue */ - rc = nvme_pcie_ctrlr_cmd_delete_io_sq(ctrlr, qpair, nvme_completion_poll_cb, status); - if (rc != 0) { - SPDK_ERRLOG("Failed to send request to delete_io_sq with rc=%d\n", rc); - free(status); - return rc; - } - if (nvme_wait_for_completion(ctrlr->adminq, status)) { - if (!status->timed_out) { - free(status); - } - return -1; - } - - memset(status, 0, sizeof(*status)); - /* Delete the completion queue */ - rc = nvme_pcie_ctrlr_cmd_delete_io_cq(ctrlr, qpair, nvme_completion_poll_cb, status); - if (rc != 0) { - SPDK_ERRLOG("Failed to send request to delete_io_cq with rc=%d\n", rc); - free(status); - return rc; - } - if (nvme_wait_for_completion(ctrlr->adminq, status)) { - if (!status->timed_out) { - free(status); - } - return -1; - } - free(status); - -free: - if (qpair->no_deletion_notification_needed == 0) { - /* Abort the rest of the I/O */ - nvme_vfio_qpair_abort_trackers(qpair, 1); - } - - nvme_vfio_qpair_destroy(qpair); - return 0; -} - -static inline void -nvme_vfio_qpair_ring_sq_doorbell(struct spdk_nvme_qpair *qpair) -{ - struct nvme_pcie_qpair *vqpair = nvme_pcie_qpair(qpair); - - if (qpair->first_fused_submitted) { - /* This is first cmd of two fused commands - don't ring doorbell */ - qpair->first_fused_submitted = 0; - return; - } - - spdk_wmb(); - spdk_mmio_write_4(vqpair->sq_tdbl, vqpair->sq_tail); -} - -static inline void -nvme_vfio_qpair_ring_cq_doorbell(struct spdk_nvme_qpair *qpair) -{ - struct nvme_pcie_qpair *vqpair = nvme_pcie_qpair(qpair); - - spdk_mmio_write_4(vqpair->cq_hdbl, vqpair->cq_head); -} - -static void -nvme_vfio_qpair_submit_tracker(struct spdk_nvme_qpair *qpair, struct nvme_tracker *tr) -{ - struct nvme_request *req; - struct nvme_pcie_qpair *vqpair = nvme_pcie_qpair(qpair); - - req = tr->req; - assert(req != NULL); - - if (req->cmd.fuse == SPDK_NVME_IO_FLAGS_FUSE_FIRST) { - /* This is first cmd of two fused commands - don't ring doorbell */ - qpair->first_fused_submitted = 1; - } - - vqpair->cmd[vqpair->sq_tail] = req->cmd; - - if (spdk_unlikely(++vqpair->sq_tail == vqpair->num_entries)) { - vqpair->sq_tail = 0; - } - - if (spdk_unlikely(vqpair->sq_tail == vqpair->sq_head)) { - SPDK_ERRLOG("sq_tail is passing sq_head!\n"); - } - - nvme_vfio_qpair_ring_sq_doorbell(qpair); -} - -static void -nvme_vfio_qpair_complete_tracker(struct spdk_nvme_qpair *qpair, struct nvme_tracker *tr, - struct spdk_nvme_cpl *cpl, bool print_on_error) -{ - struct nvme_pcie_qpair *vqpair = nvme_pcie_qpair(qpair); - struct nvme_request *req; - bool retry, error; - bool req_from_current_proc = true; - - req = tr->req; - - assert(req != NULL); - - error = spdk_nvme_cpl_is_error(cpl); - retry = error && nvme_completion_is_retry(cpl) && - req->retries < vqpair->retry_count; - - if (error && print_on_error && !qpair->ctrlr->opts.disable_error_logging) { - spdk_nvme_qpair_print_command(qpair, &req->cmd); - spdk_nvme_qpair_print_completion(qpair, cpl); - } - - assert(cpl->cid == req->cmd.cid); - - if (retry) { - req->retries++; - nvme_vfio_qpair_submit_tracker(qpair, tr); - } else { - /* Only check admin requests from different processes. */ - if (nvme_qpair_is_admin_queue(qpair) && req->pid != getpid()) { - req_from_current_proc = false; - nvme_pcie_qpair_insert_pending_admin_request(qpair, req, cpl); - } else { - nvme_complete_request(tr->cb_fn, tr->cb_arg, qpair, req, cpl); - } - - if (req_from_current_proc == true) { - nvme_qpair_free_request(qpair, req); - } - - tr->req = NULL; - - TAILQ_REMOVE(&vqpair->outstanding_tr, tr, tq_list); - TAILQ_INSERT_HEAD(&vqpair->free_tr, tr, tq_list); - } -} - -static void -nvme_vfio_qpair_manual_complete_tracker(struct spdk_nvme_qpair *qpair, - struct nvme_tracker *tr, uint32_t sct, uint32_t sc, uint32_t dnr, - bool print_on_error) -{ - struct spdk_nvme_cpl cpl; - - memset(&cpl, 0, sizeof(cpl)); - cpl.sqid = qpair->id; - cpl.cid = tr->cid; - cpl.status.sct = sct; - cpl.status.sc = sc; - cpl.status.dnr = dnr; - nvme_vfio_qpair_complete_tracker(qpair, tr, &cpl, print_on_error); -} - -static void -nvme_vfio_qpair_abort_trackers(struct spdk_nvme_qpair *qpair, uint32_t dnr) -{ - struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); - struct nvme_tracker *tr, *temp, *last; - - last = TAILQ_LAST(&pqpair->outstanding_tr, nvme_outstanding_tr_head); - - /* Abort previously submitted (outstanding) trs */ - TAILQ_FOREACH_SAFE(tr, &pqpair->outstanding_tr, tq_list, temp) { - if (!qpair->ctrlr->opts.disable_error_logging) { - SPDK_ERRLOG("aborting outstanding command\n"); - } - nvme_vfio_qpair_manual_complete_tracker(qpair, tr, SPDK_NVME_SCT_GENERIC, - SPDK_NVME_SC_ABORTED_BY_REQUEST, dnr, true); - - if (tr == last) { - break; - } - } -} - -static void -nvme_vfio_qpair_abort_reqs(struct spdk_nvme_qpair *qpair, uint32_t dnr) -{ - nvme_vfio_qpair_abort_trackers(qpair, dnr); -} - -static void -nvme_vfio_admin_qpair_abort_aers(struct spdk_nvme_qpair *qpair) -{ - struct nvme_pcie_qpair *vqpair = nvme_pcie_qpair(qpair); - struct nvme_tracker *tr; - - tr = TAILQ_FIRST(&vqpair->outstanding_tr); - while (tr != NULL) { - assert(tr->req != NULL); - if (tr->req->cmd.opc == SPDK_NVME_OPC_ASYNC_EVENT_REQUEST) { - nvme_vfio_qpair_manual_complete_tracker(qpair, tr, - SPDK_NVME_SCT_GENERIC, SPDK_NVME_SC_ABORTED_SQ_DELETION, 0, - false); - tr = TAILQ_FIRST(&vqpair->outstanding_tr); - } else { - tr = TAILQ_NEXT(tr, tq_list); - } - } -} - -static void -nvme_vfio_admin_qpair_destroy(struct spdk_nvme_qpair *qpair) -{ - nvme_vfio_admin_qpair_abort_aers(qpair); -} - -static int -nvme_vfio_qpair_destroy(struct spdk_nvme_qpair *qpair) -{ - struct nvme_pcie_qpair *vqpair = nvme_pcie_qpair(qpair); - - if (nvme_qpair_is_admin_queue(qpair)) { - nvme_vfio_admin_qpair_destroy(qpair); - } - - spdk_free(vqpair->cmd); - spdk_free(vqpair->cpl); - - if (vqpair->tr) { - spdk_free(vqpair->tr); - } - - nvme_qpair_deinit(qpair); - - spdk_free(vqpair); - - return 0; -} - static inline int nvme_vfio_prp_list_append(struct nvme_tracker *tr, uint32_t *prp_index, void *virt_addr, size_t len, uint32_t page_size) @@ -755,7 +464,7 @@ nvme_vfio_qpair_build_contig_request(struct spdk_nvme_qpair *qpair, struct nvme_ rc = nvme_vfio_prp_list_append(tr, &prp_index, req->payload.contig_or_cb_arg + req->payload_offset, req->payload_size, qpair->ctrlr->page_size); if (rc) { - nvme_vfio_qpair_manual_complete_tracker(qpair, tr, SPDK_NVME_SCT_GENERIC, + nvme_pcie_qpair_manual_complete_tracker(qpair, tr, SPDK_NVME_SCT_GENERIC, SPDK_NVME_SC_INVALID_FIELD, 1 /* do not retry */, true); } @@ -797,7 +506,7 @@ nvme_vfio_qpair_submit_request(struct spdk_nvme_qpair *qpair, struct nvme_reques } } - nvme_vfio_qpair_submit_tracker(qpair, tr); + nvme_pcie_qpair_submit_tracker(qpair, tr); exit: if (spdk_unlikely(nvme_qpair_is_admin_queue(qpair))) { @@ -807,97 +516,6 @@ exit: return rc; } -static int32_t -nvme_vfio_qpair_process_completions(struct spdk_nvme_qpair *qpair, uint32_t max_completions) -{ - struct nvme_pcie_qpair *vqpair = nvme_pcie_qpair(qpair); - struct nvme_tracker *tr; - struct spdk_nvme_cpl *cpl, *next_cpl; - uint32_t num_completions = 0; - struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; - uint16_t next_cq_head; - uint8_t next_phase; - bool next_is_valid = false; - - if (spdk_unlikely(nvme_qpair_is_admin_queue(qpair))) { - nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); - } - - if (max_completions == 0 || max_completions > vqpair->max_completions_cap) { - /* - * max_completions == 0 means unlimited, but complete at most - * max_completions_cap batch of I/O at a time so that the completion - * queue doorbells don't wrap around. - */ - max_completions = vqpair->max_completions_cap; - } - - while (1) { - cpl = &vqpair->cpl[vqpair->cq_head]; - - if (!next_is_valid && cpl->status.p != vqpair->flags.phase) { - break; - } - - if (spdk_likely(vqpair->cq_head + 1 != vqpair->num_entries)) { - next_cq_head = vqpair->cq_head + 1; - next_phase = vqpair->flags.phase; - } else { - next_cq_head = 0; - next_phase = !vqpair->flags.phase; - } - next_cpl = &vqpair->cpl[next_cq_head]; - next_is_valid = (next_cpl->status.p == next_phase); - if (next_is_valid) { - __builtin_prefetch(&vqpair->tr[next_cpl->cid]); - } - - if (spdk_unlikely(++vqpair->cq_head == vqpair->num_entries)) { - vqpair->cq_head = 0; - vqpair->flags.phase = !vqpair->flags.phase; - } - - tr = &vqpair->tr[cpl->cid]; - /* Prefetch the req's STAILQ_ENTRY since we'll need to access it - * as part of putting the req back on the qpair's free list. - */ - __builtin_prefetch(&tr->req->stailq); - vqpair->sq_head = cpl->sqhd; - - if (tr->req) { - nvme_vfio_qpair_complete_tracker(qpair, tr, cpl, true); - } else { - SPDK_ERRLOG("cpl does not map to outstanding cmd\n"); - spdk_nvme_qpair_print_completion(qpair, cpl); - assert(0); - } - - if (++num_completions == max_completions) { - break; - } - } - - if (num_completions > 0) { - nvme_vfio_qpair_ring_cq_doorbell(qpair); - } - - if (vqpair->flags.delay_cmd_submit) { - if (vqpair->last_sq_tail != vqpair->sq_tail) { - nvme_vfio_qpair_ring_sq_doorbell(qpair); - vqpair->last_sq_tail = vqpair->sq_tail; - } - } - - /* Before returning, complete any pending admin request. */ - if (spdk_unlikely(nvme_qpair_is_admin_queue(qpair))) { - nvme_pcie_qpair_complete_pending_admin_request(qpair); - - nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); - } - - return num_completions; -} - const struct spdk_nvme_transport_ops vfio_ops = { .name = "VFIOUSER", .type = SPDK_NVME_TRANSPORT_VFIOUSER, @@ -914,16 +532,16 @@ const struct spdk_nvme_transport_ops vfio_ops = { .ctrlr_get_max_xfer_size = nvme_vfio_ctrlr_get_max_xfer_size, .ctrlr_get_max_sges = nvme_vfio_ctrlr_get_max_sges, - .ctrlr_create_io_qpair = nvme_vfio_ctrlr_create_io_qpair, - .ctrlr_delete_io_qpair = nvme_vfio_ctrlr_delete_io_qpair, + .ctrlr_create_io_qpair = nvme_pcie_ctrlr_create_io_qpair, + .ctrlr_delete_io_qpair = nvme_pcie_ctrlr_delete_io_qpair, .ctrlr_connect_qpair = nvme_pcie_ctrlr_connect_qpair, .ctrlr_disconnect_qpair = nvme_pcie_ctrlr_disconnect_qpair, - .admin_qpair_abort_aers = nvme_vfio_admin_qpair_abort_aers, + .admin_qpair_abort_aers = nvme_pcie_admin_qpair_abort_aers, .qpair_reset = nvme_pcie_qpair_reset, - .qpair_abort_reqs = nvme_vfio_qpair_abort_reqs, + .qpair_abort_reqs = nvme_pcie_qpair_abort_reqs, .qpair_submit_request = nvme_vfio_qpair_submit_request, - .qpair_process_completions = nvme_vfio_qpair_process_completions, + .qpair_process_completions = nvme_pcie_qpair_process_completions, .poll_group_create = nvme_pcie_poll_group_create, .poll_group_connect_qpair = nvme_pcie_poll_group_connect_qpair,