nvme/pcie: move the common IO path APIs to nvme_pcie_common.c

The data path for PCIe and vfio-user transports are almost
same too, so move the code from nvme_pcie.c to nvme_pcie_common.c,
so that these APIs can be reused by vfio_user.

No logic change for this patch.

Change-Id: I82f480bba3bae0ce35e2a98f29839081095f7d50
Signed-off-by: Changpeng Liu <changpeng.liu@intel.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/6040
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
Reviewed-by: <dongx.yi@intel.com>
Reviewed-by: Aleksey Marchuk <alexeymar@mellanox.com>
This commit is contained in:
Changpeng Liu 2021-01-22 21:03:01 +08:00 committed by Tomasz Zawadzki
parent 613f82f547
commit 8b203d1994
4 changed files with 578 additions and 945 deletions

View File

@ -52,9 +52,7 @@ struct nvme_pcie_enum_ctx {
static int nvme_pcie_ctrlr_attach(struct spdk_nvme_probe_ctx *probe_ctx,
struct spdk_pci_addr *pci_addr);
static int nvme_pcie_qpair_destroy(struct spdk_nvme_qpair *qpair);
__thread struct nvme_pcie_ctrlr *g_thread_mmio_ctrlr = NULL;
static uint16_t g_signal_lock;
static bool g_sigset = false;
@ -755,245 +753,6 @@ nvme_pcie_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr)
return 0;
}
/* Used when dst points to MMIO (i.e. CMB) in a virtual machine - in these cases we must
* not use wide instructions because QEMU will not emulate such instructions to MMIO space.
* So this function ensures we only copy 8 bytes at a time.
*/
static inline void
nvme_pcie_copy_command_mmio(struct spdk_nvme_cmd *dst, const struct spdk_nvme_cmd *src)
{
uint64_t *dst64 = (uint64_t *)dst;
const uint64_t *src64 = (const uint64_t *)src;
uint32_t i;
for (i = 0; i < sizeof(*dst) / 8; i++) {
dst64[i] = src64[i];
}
}
static inline void
nvme_pcie_copy_command(struct spdk_nvme_cmd *dst, const struct spdk_nvme_cmd *src)
{
/* dst and src are known to be non-overlapping and 64-byte aligned. */
#if defined(__SSE2__)
__m128i *d128 = (__m128i *)dst;
const __m128i *s128 = (const __m128i *)src;
_mm_stream_si128(&d128[0], _mm_load_si128(&s128[0]));
_mm_stream_si128(&d128[1], _mm_load_si128(&s128[1]));
_mm_stream_si128(&d128[2], _mm_load_si128(&s128[2]));
_mm_stream_si128(&d128[3], _mm_load_si128(&s128[3]));
#else
*dst = *src;
#endif
}
static inline int
nvme_pcie_qpair_need_event(uint16_t event_idx, uint16_t new_idx, uint16_t old)
{
return (uint16_t)(new_idx - event_idx) <= (uint16_t)(new_idx - old);
}
static bool
nvme_pcie_qpair_update_mmio_required(struct spdk_nvme_qpair *qpair, uint16_t value,
volatile uint32_t *shadow_db,
volatile uint32_t *eventidx)
{
uint16_t old;
if (!shadow_db) {
return true;
}
old = *shadow_db;
*shadow_db = value;
/*
* Ensure that the doorbell is updated before reading the EventIdx from
* memory
*/
spdk_mb();
if (!nvme_pcie_qpair_need_event(*eventidx, value, old)) {
return false;
}
return true;
}
static inline void
nvme_pcie_qpair_ring_sq_doorbell(struct spdk_nvme_qpair *qpair)
{
struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair);
struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(qpair->ctrlr);
bool need_mmio = true;
if (qpair->first_fused_submitted) {
/* This is first cmd of two fused commands - don't ring doorbell */
qpair->first_fused_submitted = 0;
return;
}
if (spdk_unlikely(pqpair->flags.has_shadow_doorbell)) {
need_mmio = nvme_pcie_qpair_update_mmio_required(qpair,
pqpair->sq_tail,
pqpair->shadow_doorbell.sq_tdbl,
pqpair->shadow_doorbell.sq_eventidx);
}
if (spdk_likely(need_mmio)) {
spdk_wmb();
g_thread_mmio_ctrlr = pctrlr;
spdk_mmio_write_4(pqpair->sq_tdbl, pqpair->sq_tail);
g_thread_mmio_ctrlr = NULL;
}
}
static inline void
nvme_pcie_qpair_ring_cq_doorbell(struct spdk_nvme_qpair *qpair)
{
struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair);
struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(qpair->ctrlr);
bool need_mmio = true;
if (spdk_unlikely(pqpair->flags.has_shadow_doorbell)) {
need_mmio = nvme_pcie_qpair_update_mmio_required(qpair,
pqpair->cq_head,
pqpair->shadow_doorbell.cq_hdbl,
pqpair->shadow_doorbell.cq_eventidx);
}
if (spdk_likely(need_mmio)) {
g_thread_mmio_ctrlr = pctrlr;
spdk_mmio_write_4(pqpair->cq_hdbl, pqpair->cq_head);
g_thread_mmio_ctrlr = NULL;
}
}
static void
nvme_pcie_qpair_submit_tracker(struct spdk_nvme_qpair *qpair, struct nvme_tracker *tr)
{
struct nvme_request *req;
struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair);
struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr;
req = tr->req;
assert(req != NULL);
if (req->cmd.fuse == SPDK_NVME_IO_FLAGS_FUSE_FIRST) {
/* This is first cmd of two fused commands - don't ring doorbell */
qpair->first_fused_submitted = 1;
}
/* Don't use wide instructions to copy NVMe command, this is limited by QEMU
* virtual NVMe controller, the maximum access width is 8 Bytes for one time.
*/
if (spdk_unlikely((ctrlr->quirks & NVME_QUIRK_MAXIMUM_PCI_ACCESS_WIDTH) && pqpair->sq_in_cmb)) {
nvme_pcie_copy_command_mmio(&pqpair->cmd[pqpair->sq_tail], &req->cmd);
} else {
/* Copy the command from the tracker to the submission queue. */
nvme_pcie_copy_command(&pqpair->cmd[pqpair->sq_tail], &req->cmd);
}
if (spdk_unlikely(++pqpair->sq_tail == pqpair->num_entries)) {
pqpair->sq_tail = 0;
}
if (spdk_unlikely(pqpair->sq_tail == pqpair->sq_head)) {
SPDK_ERRLOG("sq_tail is passing sq_head!\n");
}
if (!pqpair->flags.delay_cmd_submit) {
nvme_pcie_qpair_ring_sq_doorbell(qpair);
}
}
static void
nvme_pcie_qpair_complete_tracker(struct spdk_nvme_qpair *qpair, struct nvme_tracker *tr,
struct spdk_nvme_cpl *cpl, bool print_on_error)
{
struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair);
struct nvme_request *req;
bool retry, error;
bool req_from_current_proc = true;
req = tr->req;
assert(req != NULL);
error = spdk_nvme_cpl_is_error(cpl);
retry = error && nvme_completion_is_retry(cpl) &&
req->retries < pqpair->retry_count;
if (error && print_on_error && !qpair->ctrlr->opts.disable_error_logging) {
spdk_nvme_qpair_print_command(qpair, &req->cmd);
spdk_nvme_qpair_print_completion(qpair, cpl);
}
assert(cpl->cid == req->cmd.cid);
if (retry) {
req->retries++;
nvme_pcie_qpair_submit_tracker(qpair, tr);
} else {
TAILQ_REMOVE(&pqpair->outstanding_tr, tr, tq_list);
/* Only check admin requests from different processes. */
if (nvme_qpair_is_admin_queue(qpair) && req->pid != getpid()) {
req_from_current_proc = false;
nvme_pcie_qpair_insert_pending_admin_request(qpair, req, cpl);
} else {
nvme_complete_request(tr->cb_fn, tr->cb_arg, qpair, req, cpl);
}
if (req_from_current_proc == true) {
nvme_qpair_free_request(qpair, req);
}
tr->req = NULL;
TAILQ_INSERT_HEAD(&pqpair->free_tr, tr, tq_list);
}
}
static void
nvme_pcie_qpair_manual_complete_tracker(struct spdk_nvme_qpair *qpair,
struct nvme_tracker *tr, uint32_t sct, uint32_t sc, uint32_t dnr,
bool print_on_error)
{
struct spdk_nvme_cpl cpl;
memset(&cpl, 0, sizeof(cpl));
cpl.sqid = qpair->id;
cpl.cid = tr->cid;
cpl.status.sct = sct;
cpl.status.sc = sc;
cpl.status.dnr = dnr;
nvme_pcie_qpair_complete_tracker(qpair, tr, &cpl, print_on_error);
}
static void
nvme_pcie_qpair_abort_trackers(struct spdk_nvme_qpair *qpair, uint32_t dnr)
{
struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair);
struct nvme_tracker *tr, *temp, *last;
last = TAILQ_LAST(&pqpair->outstanding_tr, nvme_outstanding_tr_head);
/* Abort previously submitted (outstanding) trs */
TAILQ_FOREACH_SAFE(tr, &pqpair->outstanding_tr, tq_list, temp) {
if (!qpair->ctrlr->opts.disable_error_logging) {
SPDK_ERRLOG("aborting outstanding command\n");
}
nvme_pcie_qpair_manual_complete_tracker(qpair, tr, SPDK_NVME_SCT_GENERIC,
SPDK_NVME_SC_ABORTED_BY_REQUEST, dnr, true);
if (tr == last) {
break;
}
}
}
static int
nvme_pcie_qpair_iterate_requests(struct spdk_nvme_qpair *qpair,
int (*iter_fn)(struct nvme_request *req, void *arg),
@ -1017,172 +776,6 @@ nvme_pcie_qpair_iterate_requests(struct spdk_nvme_qpair *qpair,
return 0;
}
static void
nvme_pcie_admin_qpair_abort_aers(struct spdk_nvme_qpair *qpair)
{
struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair);
struct nvme_tracker *tr;
tr = TAILQ_FIRST(&pqpair->outstanding_tr);
while (tr != NULL) {
assert(tr->req != NULL);
if (tr->req->cmd.opc == SPDK_NVME_OPC_ASYNC_EVENT_REQUEST) {
nvme_pcie_qpair_manual_complete_tracker(qpair, tr,
SPDK_NVME_SCT_GENERIC, SPDK_NVME_SC_ABORTED_SQ_DELETION, 0,
false);
tr = TAILQ_FIRST(&pqpair->outstanding_tr);
} else {
tr = TAILQ_NEXT(tr, tq_list);
}
}
}
static void
nvme_pcie_admin_qpair_destroy(struct spdk_nvme_qpair *qpair)
{
nvme_pcie_admin_qpair_abort_aers(qpair);
}
static int
nvme_pcie_qpair_destroy(struct spdk_nvme_qpair *qpair)
{
struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair);
if (nvme_qpair_is_admin_queue(qpair)) {
nvme_pcie_admin_qpair_destroy(qpair);
}
/*
* We check sq_vaddr and cq_vaddr to see if the user specified the memory
* buffers when creating the I/O queue.
* If the user specified them, we cannot free that memory.
* Nor do we free it if it's in the CMB.
*/
if (!pqpair->sq_vaddr && pqpair->cmd && !pqpair->sq_in_cmb) {
spdk_free(pqpair->cmd);
}
if (!pqpair->cq_vaddr && pqpair->cpl) {
spdk_free(pqpair->cpl);
}
if (pqpair->tr) {
spdk_free(pqpair->tr);
}
nvme_qpair_deinit(qpair);
spdk_free(pqpair);
return 0;
}
static void
nvme_pcie_qpair_abort_reqs(struct spdk_nvme_qpair *qpair, uint32_t dnr)
{
nvme_pcie_qpair_abort_trackers(qpair, dnr);
}
static struct spdk_nvme_qpair *
nvme_pcie_ctrlr_create_io_qpair(struct spdk_nvme_ctrlr *ctrlr, uint16_t qid,
const struct spdk_nvme_io_qpair_opts *opts)
{
struct nvme_pcie_qpair *pqpair;
struct spdk_nvme_qpair *qpair;
int rc;
assert(ctrlr != NULL);
pqpair = spdk_zmalloc(sizeof(*pqpair), 64, NULL,
SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE);
if (pqpair == NULL) {
return NULL;
}
pqpair->num_entries = opts->io_queue_size;
pqpair->flags.delay_cmd_submit = opts->delay_cmd_submit;
qpair = &pqpair->qpair;
rc = nvme_qpair_init(qpair, qid, ctrlr, opts->qprio, opts->io_queue_requests);
if (rc != 0) {
nvme_pcie_qpair_destroy(qpair);
return NULL;
}
rc = nvme_pcie_qpair_construct(qpair, opts);
if (rc != 0) {
nvme_pcie_qpair_destroy(qpair);
return NULL;
}
return qpair;
}
static int32_t nvme_pcie_qpair_process_completions(struct spdk_nvme_qpair *qpair,
uint32_t max_completions);
static int
nvme_pcie_ctrlr_delete_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair)
{
struct nvme_completion_poll_status *status;
int rc;
assert(ctrlr != NULL);
if (ctrlr->is_removed) {
goto free;
}
status = calloc(1, sizeof(*status));
if (!status) {
SPDK_ERRLOG("Failed to allocate status tracker\n");
return -ENOMEM;
}
/* Delete the I/O submission queue */
rc = nvme_pcie_ctrlr_cmd_delete_io_sq(ctrlr, qpair, nvme_completion_poll_cb, status);
if (rc != 0) {
SPDK_ERRLOG("Failed to send request to delete_io_sq with rc=%d\n", rc);
free(status);
return rc;
}
if (nvme_wait_for_completion(ctrlr->adminq, status)) {
if (!status->timed_out) {
free(status);
}
return -1;
}
/* Now that the submission queue is deleted, the device is supposed to have
* completed any outstanding I/O. Try to complete them. If they don't complete,
* they'll be marked as aborted and completed below. */
nvme_pcie_qpair_process_completions(qpair, 0);
memset(status, 0, sizeof(*status));
/* Delete the completion queue */
rc = nvme_pcie_ctrlr_cmd_delete_io_cq(ctrlr, qpair, nvme_completion_poll_cb, status);
if (rc != 0) {
SPDK_ERRLOG("Failed to send request to delete_io_cq with rc=%d\n", rc);
free(status);
return rc;
}
if (nvme_wait_for_completion(ctrlr->adminq, status)) {
if (!status->timed_out) {
free(status);
}
return -1;
}
free(status);
free:
if (qpair->no_deletion_notification_needed == 0) {
/* Abort the rest of the I/O */
nvme_pcie_qpair_abort_trackers(qpair, 1);
}
nvme_pcie_qpair_destroy(qpair);
return 0;
}
static void
nvme_pcie_fail_request_bad_vtophys(struct spdk_nvme_qpair *qpair, struct nvme_tracker *tr)
{
@ -1695,154 +1288,6 @@ exit:
return rc;
}
static void
nvme_pcie_qpair_check_timeout(struct spdk_nvme_qpair *qpair)
{
uint64_t t02;
struct nvme_tracker *tr, *tmp;
struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair);
struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr;
struct spdk_nvme_ctrlr_process *active_proc;
/* Don't check timeouts during controller initialization. */
if (ctrlr->state != NVME_CTRLR_STATE_READY) {
return;
}
if (nvme_qpair_is_admin_queue(qpair)) {
active_proc = nvme_ctrlr_get_current_process(ctrlr);
} else {
active_proc = qpair->active_proc;
}
/* Only check timeouts if the current process has a timeout callback. */
if (active_proc == NULL || active_proc->timeout_cb_fn == NULL) {
return;
}
t02 = spdk_get_ticks();
TAILQ_FOREACH_SAFE(tr, &pqpair->outstanding_tr, tq_list, tmp) {
assert(tr->req != NULL);
if (nvme_request_check_timeout(tr->req, tr->cid, active_proc, t02)) {
/*
* The requests are in order, so as soon as one has not timed out,
* stop iterating.
*/
break;
}
}
}
static int32_t
nvme_pcie_qpair_process_completions(struct spdk_nvme_qpair *qpair, uint32_t max_completions)
{
struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair);
struct nvme_tracker *tr;
struct spdk_nvme_cpl *cpl, *next_cpl;
uint32_t num_completions = 0;
struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr;
uint16_t next_cq_head;
uint8_t next_phase;
bool next_is_valid = false;
if (spdk_unlikely(nvme_qpair_is_admin_queue(qpair))) {
nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
}
if (max_completions == 0 || max_completions > pqpair->max_completions_cap) {
/*
* max_completions == 0 means unlimited, but complete at most
* max_completions_cap batch of I/O at a time so that the completion
* queue doorbells don't wrap around.
*/
max_completions = pqpair->max_completions_cap;
}
while (1) {
cpl = &pqpair->cpl[pqpair->cq_head];
if (!next_is_valid && cpl->status.p != pqpair->flags.phase) {
break;
}
if (spdk_likely(pqpair->cq_head + 1 != pqpair->num_entries)) {
next_cq_head = pqpair->cq_head + 1;
next_phase = pqpair->flags.phase;
} else {
next_cq_head = 0;
next_phase = !pqpair->flags.phase;
}
next_cpl = &pqpair->cpl[next_cq_head];
next_is_valid = (next_cpl->status.p == next_phase);
if (next_is_valid) {
__builtin_prefetch(&pqpair->tr[next_cpl->cid]);
}
#ifdef __PPC64__
/*
* This memory barrier prevents reordering of:
* - load after store from/to tr
* - load after load cpl phase and cpl cid
*/
spdk_mb();
#elif defined(__aarch64__)
__asm volatile("dmb oshld" ::: "memory");
#endif
if (spdk_unlikely(++pqpair->cq_head == pqpair->num_entries)) {
pqpair->cq_head = 0;
pqpair->flags.phase = !pqpair->flags.phase;
}
tr = &pqpair->tr[cpl->cid];
/* Prefetch the req's STAILQ_ENTRY since we'll need to access it
* as part of putting the req back on the qpair's free list.
*/
__builtin_prefetch(&tr->req->stailq);
pqpair->sq_head = cpl->sqhd;
if (tr->req) {
nvme_pcie_qpair_complete_tracker(qpair, tr, cpl, true);
} else {
SPDK_ERRLOG("cpl does not map to outstanding cmd\n");
spdk_nvme_qpair_print_completion(qpair, cpl);
assert(0);
}
if (++num_completions == max_completions) {
break;
}
}
if (num_completions > 0) {
nvme_pcie_qpair_ring_cq_doorbell(qpair);
}
if (pqpair->flags.delay_cmd_submit) {
if (pqpair->last_sq_tail != pqpair->sq_tail) {
nvme_pcie_qpair_ring_sq_doorbell(qpair);
pqpair->last_sq_tail = pqpair->sq_tail;
}
}
if (spdk_unlikely(ctrlr->timeout_enabled)) {
/*
* User registered for timeout callback
*/
nvme_pcie_qpair_check_timeout(qpair);
}
/* Before returning, complete any pending admin request. */
if (spdk_unlikely(nvme_qpair_is_admin_queue(qpair))) {
nvme_pcie_qpair_complete_pending_admin_request(qpair);
nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
}
return num_completions;
}
static struct spdk_pci_id nvme_pci_driver_id[] = {
{
.class_id = SPDK_PCI_CLASS_NVME,

View File

@ -40,6 +40,8 @@
#include "nvme_internal.h"
#include "nvme_pcie_internal.h"
__thread struct nvme_pcie_ctrlr *g_thread_mmio_ctrlr = NULL;
static uint64_t
nvme_pcie_vtophys(struct spdk_nvme_ctrlr *ctrlr, const void *buf)
{
@ -526,6 +528,474 @@ nvme_pcie_ctrlr_disconnect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme
{
}
/* Used when dst points to MMIO (i.e. CMB) in a virtual machine - in these cases we must
* not use wide instructions because QEMU will not emulate such instructions to MMIO space.
* So this function ensures we only copy 8 bytes at a time.
*/
static inline void
nvme_pcie_copy_command_mmio(struct spdk_nvme_cmd *dst, const struct spdk_nvme_cmd *src)
{
uint64_t *dst64 = (uint64_t *)dst;
const uint64_t *src64 = (const uint64_t *)src;
uint32_t i;
for (i = 0; i < sizeof(*dst) / 8; i++) {
dst64[i] = src64[i];
}
}
static inline void
nvme_pcie_copy_command(struct spdk_nvme_cmd *dst, const struct spdk_nvme_cmd *src)
{
/* dst and src are known to be non-overlapping and 64-byte aligned. */
#if defined(__SSE2__)
__m128i *d128 = (__m128i *)dst;
const __m128i *s128 = (const __m128i *)src;
_mm_stream_si128(&d128[0], _mm_load_si128(&s128[0]));
_mm_stream_si128(&d128[1], _mm_load_si128(&s128[1]));
_mm_stream_si128(&d128[2], _mm_load_si128(&s128[2]));
_mm_stream_si128(&d128[3], _mm_load_si128(&s128[3]));
#else
*dst = *src;
#endif
}
void
nvme_pcie_qpair_submit_tracker(struct spdk_nvme_qpair *qpair, struct nvme_tracker *tr)
{
struct nvme_request *req;
struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair);
struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr;
req = tr->req;
assert(req != NULL);
if (req->cmd.fuse == SPDK_NVME_IO_FLAGS_FUSE_FIRST) {
/* This is first cmd of two fused commands - don't ring doorbell */
qpair->first_fused_submitted = 1;
}
/* Don't use wide instructions to copy NVMe command, this is limited by QEMU
* virtual NVMe controller, the maximum access width is 8 Bytes for one time.
*/
if (spdk_unlikely((ctrlr->quirks & NVME_QUIRK_MAXIMUM_PCI_ACCESS_WIDTH) && pqpair->sq_in_cmb)) {
nvme_pcie_copy_command_mmio(&pqpair->cmd[pqpair->sq_tail], &req->cmd);
} else {
/* Copy the command from the tracker to the submission queue. */
nvme_pcie_copy_command(&pqpair->cmd[pqpair->sq_tail], &req->cmd);
}
if (spdk_unlikely(++pqpair->sq_tail == pqpair->num_entries)) {
pqpair->sq_tail = 0;
}
if (spdk_unlikely(pqpair->sq_tail == pqpair->sq_head)) {
SPDK_ERRLOG("sq_tail is passing sq_head!\n");
}
if (!pqpair->flags.delay_cmd_submit) {
nvme_pcie_qpair_ring_sq_doorbell(qpair);
}
}
void
nvme_pcie_qpair_complete_tracker(struct spdk_nvme_qpair *qpair, struct nvme_tracker *tr,
struct spdk_nvme_cpl *cpl, bool print_on_error)
{
struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair);
struct nvme_request *req;
bool retry, error;
bool req_from_current_proc = true;
req = tr->req;
assert(req != NULL);
error = spdk_nvme_cpl_is_error(cpl);
retry = error && nvme_completion_is_retry(cpl) &&
req->retries < pqpair->retry_count;
if (error && print_on_error && !qpair->ctrlr->opts.disable_error_logging) {
spdk_nvme_qpair_print_command(qpair, &req->cmd);
spdk_nvme_qpair_print_completion(qpair, cpl);
}
assert(cpl->cid == req->cmd.cid);
if (retry) {
req->retries++;
nvme_pcie_qpair_submit_tracker(qpair, tr);
} else {
TAILQ_REMOVE(&pqpair->outstanding_tr, tr, tq_list);
/* Only check admin requests from different processes. */
if (nvme_qpair_is_admin_queue(qpair) && req->pid != getpid()) {
req_from_current_proc = false;
nvme_pcie_qpair_insert_pending_admin_request(qpair, req, cpl);
} else {
nvme_complete_request(tr->cb_fn, tr->cb_arg, qpair, req, cpl);
}
if (req_from_current_proc == true) {
nvme_qpair_free_request(qpair, req);
}
tr->req = NULL;
TAILQ_INSERT_HEAD(&pqpair->free_tr, tr, tq_list);
}
}
void
nvme_pcie_qpair_manual_complete_tracker(struct spdk_nvme_qpair *qpair,
struct nvme_tracker *tr, uint32_t sct, uint32_t sc, uint32_t dnr,
bool print_on_error)
{
struct spdk_nvme_cpl cpl;
memset(&cpl, 0, sizeof(cpl));
cpl.sqid = qpair->id;
cpl.cid = tr->cid;
cpl.status.sct = sct;
cpl.status.sc = sc;
cpl.status.dnr = dnr;
nvme_pcie_qpair_complete_tracker(qpair, tr, &cpl, print_on_error);
}
void
nvme_pcie_qpair_abort_trackers(struct spdk_nvme_qpair *qpair, uint32_t dnr)
{
struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair);
struct nvme_tracker *tr, *temp, *last;
last = TAILQ_LAST(&pqpair->outstanding_tr, nvme_outstanding_tr_head);
/* Abort previously submitted (outstanding) trs */
TAILQ_FOREACH_SAFE(tr, &pqpair->outstanding_tr, tq_list, temp) {
if (!qpair->ctrlr->opts.disable_error_logging) {
SPDK_ERRLOG("aborting outstanding command\n");
}
nvme_pcie_qpair_manual_complete_tracker(qpair, tr, SPDK_NVME_SCT_GENERIC,
SPDK_NVME_SC_ABORTED_BY_REQUEST, dnr, true);
if (tr == last) {
break;
}
}
}
void
nvme_pcie_admin_qpair_abort_aers(struct spdk_nvme_qpair *qpair)
{
struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair);
struct nvme_tracker *tr;
tr = TAILQ_FIRST(&pqpair->outstanding_tr);
while (tr != NULL) {
assert(tr->req != NULL);
if (tr->req->cmd.opc == SPDK_NVME_OPC_ASYNC_EVENT_REQUEST) {
nvme_pcie_qpair_manual_complete_tracker(qpair, tr,
SPDK_NVME_SCT_GENERIC, SPDK_NVME_SC_ABORTED_SQ_DELETION, 0,
false);
tr = TAILQ_FIRST(&pqpair->outstanding_tr);
} else {
tr = TAILQ_NEXT(tr, tq_list);
}
}
}
void
nvme_pcie_admin_qpair_destroy(struct spdk_nvme_qpair *qpair)
{
nvme_pcie_admin_qpair_abort_aers(qpair);
}
void
nvme_pcie_qpair_abort_reqs(struct spdk_nvme_qpair *qpair, uint32_t dnr)
{
nvme_pcie_qpair_abort_trackers(qpair, dnr);
}
static void
nvme_pcie_qpair_check_timeout(struct spdk_nvme_qpair *qpair)
{
uint64_t t02;
struct nvme_tracker *tr, *tmp;
struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair);
struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr;
struct spdk_nvme_ctrlr_process *active_proc;
/* Don't check timeouts during controller initialization. */
if (ctrlr->state != NVME_CTRLR_STATE_READY) {
return;
}
if (nvme_qpair_is_admin_queue(qpair)) {
active_proc = nvme_ctrlr_get_current_process(ctrlr);
} else {
active_proc = qpair->active_proc;
}
/* Only check timeouts if the current process has a timeout callback. */
if (active_proc == NULL || active_proc->timeout_cb_fn == NULL) {
return;
}
t02 = spdk_get_ticks();
TAILQ_FOREACH_SAFE(tr, &pqpair->outstanding_tr, tq_list, tmp) {
assert(tr->req != NULL);
if (nvme_request_check_timeout(tr->req, tr->cid, active_proc, t02)) {
/*
* The requests are in order, so as soon as one has not timed out,
* stop iterating.
*/
break;
}
}
}
int32_t
nvme_pcie_qpair_process_completions(struct spdk_nvme_qpair *qpair, uint32_t max_completions)
{
struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair);
struct nvme_tracker *tr;
struct spdk_nvme_cpl *cpl, *next_cpl;
uint32_t num_completions = 0;
struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr;
uint16_t next_cq_head;
uint8_t next_phase;
bool next_is_valid = false;
if (spdk_unlikely(nvme_qpair_is_admin_queue(qpair))) {
nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
}
if (max_completions == 0 || max_completions > pqpair->max_completions_cap) {
/*
* max_completions == 0 means unlimited, but complete at most
* max_completions_cap batch of I/O at a time so that the completion
* queue doorbells don't wrap around.
*/
max_completions = pqpair->max_completions_cap;
}
while (1) {
cpl = &pqpair->cpl[pqpair->cq_head];
if (!next_is_valid && cpl->status.p != pqpair->flags.phase) {
break;
}
if (spdk_likely(pqpair->cq_head + 1 != pqpair->num_entries)) {
next_cq_head = pqpair->cq_head + 1;
next_phase = pqpair->flags.phase;
} else {
next_cq_head = 0;
next_phase = !pqpair->flags.phase;
}
next_cpl = &pqpair->cpl[next_cq_head];
next_is_valid = (next_cpl->status.p == next_phase);
if (next_is_valid) {
__builtin_prefetch(&pqpair->tr[next_cpl->cid]);
}
#ifdef __PPC64__
/*
* This memory barrier prevents reordering of:
* - load after store from/to tr
* - load after load cpl phase and cpl cid
*/
spdk_mb();
#elif defined(__aarch64__)
__asm volatile("dmb oshld" ::: "memory");
#endif
if (spdk_unlikely(++pqpair->cq_head == pqpair->num_entries)) {
pqpair->cq_head = 0;
pqpair->flags.phase = !pqpair->flags.phase;
}
tr = &pqpair->tr[cpl->cid];
/* Prefetch the req's STAILQ_ENTRY since we'll need to access it
* as part of putting the req back on the qpair's free list.
*/
__builtin_prefetch(&tr->req->stailq);
pqpair->sq_head = cpl->sqhd;
if (tr->req) {
nvme_pcie_qpair_complete_tracker(qpair, tr, cpl, true);
} else {
SPDK_ERRLOG("cpl does not map to outstanding cmd\n");
spdk_nvme_qpair_print_completion(qpair, cpl);
assert(0);
}
if (++num_completions == max_completions) {
break;
}
}
if (num_completions > 0) {
nvme_pcie_qpair_ring_cq_doorbell(qpair);
}
if (pqpair->flags.delay_cmd_submit) {
if (pqpair->last_sq_tail != pqpair->sq_tail) {
nvme_pcie_qpair_ring_sq_doorbell(qpair);
pqpair->last_sq_tail = pqpair->sq_tail;
}
}
if (spdk_unlikely(ctrlr->timeout_enabled)) {
/*
* User registered for timeout callback
*/
nvme_pcie_qpair_check_timeout(qpair);
}
/* Before returning, complete any pending admin request. */
if (spdk_unlikely(nvme_qpair_is_admin_queue(qpair))) {
nvme_pcie_qpair_complete_pending_admin_request(qpair);
nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
}
return num_completions;
}
int
nvme_pcie_qpair_destroy(struct spdk_nvme_qpair *qpair)
{
struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair);
if (nvme_qpair_is_admin_queue(qpair)) {
nvme_pcie_admin_qpair_destroy(qpair);
}
/*
* We check sq_vaddr and cq_vaddr to see if the user specified the memory
* buffers when creating the I/O queue.
* If the user specified them, we cannot free that memory.
* Nor do we free it if it's in the CMB.
*/
if (!pqpair->sq_vaddr && pqpair->cmd && !pqpair->sq_in_cmb) {
spdk_free(pqpair->cmd);
}
if (!pqpair->cq_vaddr && pqpair->cpl) {
spdk_free(pqpair->cpl);
}
if (pqpair->tr) {
spdk_free(pqpair->tr);
}
nvme_qpair_deinit(qpair);
spdk_free(pqpair);
return 0;
}
struct spdk_nvme_qpair *
nvme_pcie_ctrlr_create_io_qpair(struct spdk_nvme_ctrlr *ctrlr, uint16_t qid,
const struct spdk_nvme_io_qpair_opts *opts)
{
struct nvme_pcie_qpair *pqpair;
struct spdk_nvme_qpair *qpair;
int rc;
assert(ctrlr != NULL);
pqpair = spdk_zmalloc(sizeof(*pqpair), 64, NULL,
SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE);
if (pqpair == NULL) {
return NULL;
}
pqpair->num_entries = opts->io_queue_size;
pqpair->flags.delay_cmd_submit = opts->delay_cmd_submit;
qpair = &pqpair->qpair;
rc = nvme_qpair_init(qpair, qid, ctrlr, opts->qprio, opts->io_queue_requests);
if (rc != 0) {
nvme_pcie_qpair_destroy(qpair);
return NULL;
}
rc = nvme_pcie_qpair_construct(qpair, opts);
if (rc != 0) {
nvme_pcie_qpair_destroy(qpair);
return NULL;
}
return qpair;
}
int
nvme_pcie_ctrlr_delete_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair)
{
struct nvme_completion_poll_status *status;
int rc;
assert(ctrlr != NULL);
if (ctrlr->is_removed) {
goto free;
}
status = calloc(1, sizeof(*status));
if (!status) {
SPDK_ERRLOG("Failed to allocate status tracker\n");
return -ENOMEM;
}
/* Delete the I/O submission queue */
rc = nvme_pcie_ctrlr_cmd_delete_io_sq(ctrlr, qpair, nvme_completion_poll_cb, status);
if (rc != 0) {
SPDK_ERRLOG("Failed to send request to delete_io_sq with rc=%d\n", rc);
free(status);
return rc;
}
if (nvme_wait_for_completion(ctrlr->adminq, status)) {
if (!status->timed_out) {
free(status);
}
return -1;
}
/* Now that the submission queue is deleted, the device is supposed to have
* completed any outstanding I/O. Try to complete them. If they don't complete,
* they'll be marked as aborted and completed below. */
nvme_pcie_qpair_process_completions(qpair, 0);
memset(status, 0, sizeof(*status));
/* Delete the completion queue */
rc = nvme_pcie_ctrlr_cmd_delete_io_cq(ctrlr, qpair, nvme_completion_poll_cb, status);
if (rc != 0) {
SPDK_ERRLOG("Failed to send request to delete_io_cq with rc=%d\n", rc);
free(status);
return rc;
}
if (nvme_wait_for_completion(ctrlr->adminq, status)) {
if (!status->timed_out) {
free(status);
}
return -1;
}
free(status);
free:
if (qpair->no_deletion_notification_needed == 0) {
/* Abort the rest of the I/O */
nvme_pcie_qpair_abort_trackers(qpair, 1);
}
nvme_pcie_qpair_destroy(qpair);
return 0;
}
struct spdk_nvme_transport_poll_group *
nvme_pcie_poll_group_create(void)
{

View File

@ -87,6 +87,8 @@ struct nvme_pcie_ctrlr {
volatile uint32_t *doorbell_base;
};
extern __thread struct nvme_pcie_ctrlr *g_thread_mmio_ctrlr;
struct nvme_tracker {
TAILQ_ENTRY(nvme_tracker) tq_list;
@ -203,6 +205,88 @@ nvme_pcie_ctrlr(struct spdk_nvme_ctrlr *ctrlr)
return SPDK_CONTAINEROF(ctrlr, struct nvme_pcie_ctrlr, ctrlr);
}
static inline int
nvme_pcie_qpair_need_event(uint16_t event_idx, uint16_t new_idx, uint16_t old)
{
return (uint16_t)(new_idx - event_idx) <= (uint16_t)(new_idx - old);
}
static inline bool
nvme_pcie_qpair_update_mmio_required(struct spdk_nvme_qpair *qpair, uint16_t value,
volatile uint32_t *shadow_db,
volatile uint32_t *eventidx)
{
uint16_t old;
if (!shadow_db) {
return true;
}
old = *shadow_db;
*shadow_db = value;
/*
* Ensure that the doorbell is updated before reading the EventIdx from
* memory
*/
spdk_mb();
if (!nvme_pcie_qpair_need_event(*eventidx, value, old)) {
return false;
}
return true;
}
static inline void
nvme_pcie_qpair_ring_sq_doorbell(struct spdk_nvme_qpair *qpair)
{
struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair);
struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(qpair->ctrlr);
bool need_mmio = true;
if (qpair->first_fused_submitted) {
/* This is first cmd of two fused commands - don't ring doorbell */
qpair->first_fused_submitted = 0;
return;
}
if (spdk_unlikely(pqpair->flags.has_shadow_doorbell)) {
need_mmio = nvme_pcie_qpair_update_mmio_required(qpair,
pqpair->sq_tail,
pqpair->shadow_doorbell.sq_tdbl,
pqpair->shadow_doorbell.sq_eventidx);
}
if (spdk_likely(need_mmio)) {
spdk_wmb();
g_thread_mmio_ctrlr = pctrlr;
spdk_mmio_write_4(pqpair->sq_tdbl, pqpair->sq_tail);
g_thread_mmio_ctrlr = NULL;
}
}
static inline void
nvme_pcie_qpair_ring_cq_doorbell(struct spdk_nvme_qpair *qpair)
{
struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair);
struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(qpair->ctrlr);
bool need_mmio = true;
if (spdk_unlikely(pqpair->flags.has_shadow_doorbell)) {
need_mmio = nvme_pcie_qpair_update_mmio_required(qpair,
pqpair->cq_head,
pqpair->shadow_doorbell.cq_hdbl,
pqpair->shadow_doorbell.cq_eventidx);
}
if (spdk_likely(need_mmio)) {
g_thread_mmio_ctrlr = pctrlr;
spdk_mmio_write_4(pqpair->cq_hdbl, pqpair->cq_head);
g_thread_mmio_ctrlr = NULL;
}
}
int nvme_pcie_qpair_reset(struct spdk_nvme_qpair *qpair);
int nvme_pcie_qpair_construct(struct spdk_nvme_qpair *qpair,
const struct spdk_nvme_io_qpair_opts *opts);
@ -221,6 +305,22 @@ int nvme_pcie_ctrlr_cmd_delete_io_sq(struct spdk_nvme_ctrlr *ctrlr, struct spdk_
spdk_nvme_cmd_cb cb_fn, void *cb_arg);
int nvme_pcie_ctrlr_connect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair);
void nvme_pcie_ctrlr_disconnect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair);
void nvme_pcie_qpair_abort_trackers(struct spdk_nvme_qpair *qpair, uint32_t dnr);
void nvme_pcie_qpair_manual_complete_tracker(struct spdk_nvme_qpair *qpair,
struct nvme_tracker *tr, uint32_t sct, uint32_t sc, uint32_t dnr,
bool print_on_error);
void nvme_pcie_qpair_complete_tracker(struct spdk_nvme_qpair *qpair, struct nvme_tracker *tr,
struct spdk_nvme_cpl *cpl, bool print_on_error);
void nvme_pcie_qpair_submit_tracker(struct spdk_nvme_qpair *qpair, struct nvme_tracker *tr);
void nvme_pcie_admin_qpair_abort_aers(struct spdk_nvme_qpair *qpair);
void nvme_pcie_admin_qpair_destroy(struct spdk_nvme_qpair *qpair);
void nvme_pcie_qpair_abort_reqs(struct spdk_nvme_qpair *qpair, uint32_t dnr);
int32_t nvme_pcie_qpair_process_completions(struct spdk_nvme_qpair *qpair,
uint32_t max_completions);
int nvme_pcie_qpair_destroy(struct spdk_nvme_qpair *qpair);
struct spdk_nvme_qpair *nvme_pcie_ctrlr_create_io_qpair(struct spdk_nvme_ctrlr *ctrlr, uint16_t qid,
const struct spdk_nvme_io_qpair_opts *opts);
int nvme_pcie_ctrlr_delete_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair);
struct spdk_nvme_transport_poll_group *nvme_pcie_poll_group_create(void);
int nvme_pcie_poll_group_connect_qpair(struct spdk_nvme_qpair *qpair);

View File

@ -354,16 +354,13 @@ nvme_vfio_ctrlr_enable(struct spdk_nvme_ctrlr *ctrlr)
return 0;
}
static int
nvme_vfio_qpair_destroy(struct spdk_nvme_qpair *qpair);
static int
nvme_vfio_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr)
{
struct nvme_vfio_ctrlr *vctrlr = nvme_vfio_ctrlr(ctrlr);
if (ctrlr->adminq) {
nvme_vfio_qpair_destroy(ctrlr->adminq);
nvme_pcie_qpair_destroy(ctrlr->adminq);
}
nvme_ctrlr_destruct_finish(ctrlr);
@ -389,294 +386,6 @@ nvme_vfio_ctrlr_get_max_sges(struct spdk_nvme_ctrlr *ctrlr)
return NVME_MAX_SGES;
}
static struct spdk_nvme_qpair *
nvme_vfio_ctrlr_create_io_qpair(struct spdk_nvme_ctrlr *ctrlr, uint16_t qid,
const struct spdk_nvme_io_qpair_opts *opts)
{
struct nvme_pcie_qpair *vqpair;
struct spdk_nvme_qpair *qpair;
int rc;
assert(ctrlr != NULL);
vqpair = spdk_zmalloc(sizeof(*vqpair), 64, NULL,
SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE);
if (vqpair == NULL) {
return NULL;
}
vqpair->num_entries = opts->io_queue_size;
vqpair->flags.delay_cmd_submit = opts->delay_cmd_submit;
qpair = &vqpair->qpair;
rc = nvme_qpair_init(qpair, qid, ctrlr, opts->qprio, opts->io_queue_requests);
if (rc != 0) {
nvme_vfio_qpair_destroy(qpair);
return NULL;
}
rc = nvme_pcie_qpair_construct(qpair, opts);
if (rc != 0) {
nvme_vfio_qpair_destroy(qpair);
return NULL;
}
return qpair;
}
static void
nvme_vfio_qpair_abort_trackers(struct spdk_nvme_qpair *qpair, uint32_t dnr);
static int
nvme_vfio_ctrlr_delete_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair)
{
struct nvme_completion_poll_status *status;
int rc;
assert(ctrlr != NULL);
if (ctrlr->is_removed) {
goto free;
}
status = calloc(1, sizeof(*status));
if (!status) {
SPDK_ERRLOG("Failed to allocate status tracker\n");
return -ENOMEM;
}
/* Delete the I/O submission queue */
rc = nvme_pcie_ctrlr_cmd_delete_io_sq(ctrlr, qpair, nvme_completion_poll_cb, status);
if (rc != 0) {
SPDK_ERRLOG("Failed to send request to delete_io_sq with rc=%d\n", rc);
free(status);
return rc;
}
if (nvme_wait_for_completion(ctrlr->adminq, status)) {
if (!status->timed_out) {
free(status);
}
return -1;
}
memset(status, 0, sizeof(*status));
/* Delete the completion queue */
rc = nvme_pcie_ctrlr_cmd_delete_io_cq(ctrlr, qpair, nvme_completion_poll_cb, status);
if (rc != 0) {
SPDK_ERRLOG("Failed to send request to delete_io_cq with rc=%d\n", rc);
free(status);
return rc;
}
if (nvme_wait_for_completion(ctrlr->adminq, status)) {
if (!status->timed_out) {
free(status);
}
return -1;
}
free(status);
free:
if (qpair->no_deletion_notification_needed == 0) {
/* Abort the rest of the I/O */
nvme_vfio_qpair_abort_trackers(qpair, 1);
}
nvme_vfio_qpair_destroy(qpair);
return 0;
}
static inline void
nvme_vfio_qpair_ring_sq_doorbell(struct spdk_nvme_qpair *qpair)
{
struct nvme_pcie_qpair *vqpair = nvme_pcie_qpair(qpair);
if (qpair->first_fused_submitted) {
/* This is first cmd of two fused commands - don't ring doorbell */
qpair->first_fused_submitted = 0;
return;
}
spdk_wmb();
spdk_mmio_write_4(vqpair->sq_tdbl, vqpair->sq_tail);
}
static inline void
nvme_vfio_qpair_ring_cq_doorbell(struct spdk_nvme_qpair *qpair)
{
struct nvme_pcie_qpair *vqpair = nvme_pcie_qpair(qpair);
spdk_mmio_write_4(vqpair->cq_hdbl, vqpair->cq_head);
}
static void
nvme_vfio_qpair_submit_tracker(struct spdk_nvme_qpair *qpair, struct nvme_tracker *tr)
{
struct nvme_request *req;
struct nvme_pcie_qpair *vqpair = nvme_pcie_qpair(qpair);
req = tr->req;
assert(req != NULL);
if (req->cmd.fuse == SPDK_NVME_IO_FLAGS_FUSE_FIRST) {
/* This is first cmd of two fused commands - don't ring doorbell */
qpair->first_fused_submitted = 1;
}
vqpair->cmd[vqpair->sq_tail] = req->cmd;
if (spdk_unlikely(++vqpair->sq_tail == vqpair->num_entries)) {
vqpair->sq_tail = 0;
}
if (spdk_unlikely(vqpair->sq_tail == vqpair->sq_head)) {
SPDK_ERRLOG("sq_tail is passing sq_head!\n");
}
nvme_vfio_qpair_ring_sq_doorbell(qpair);
}
static void
nvme_vfio_qpair_complete_tracker(struct spdk_nvme_qpair *qpair, struct nvme_tracker *tr,
struct spdk_nvme_cpl *cpl, bool print_on_error)
{
struct nvme_pcie_qpair *vqpair = nvme_pcie_qpair(qpair);
struct nvme_request *req;
bool retry, error;
bool req_from_current_proc = true;
req = tr->req;
assert(req != NULL);
error = spdk_nvme_cpl_is_error(cpl);
retry = error && nvme_completion_is_retry(cpl) &&
req->retries < vqpair->retry_count;
if (error && print_on_error && !qpair->ctrlr->opts.disable_error_logging) {
spdk_nvme_qpair_print_command(qpair, &req->cmd);
spdk_nvme_qpair_print_completion(qpair, cpl);
}
assert(cpl->cid == req->cmd.cid);
if (retry) {
req->retries++;
nvme_vfio_qpair_submit_tracker(qpair, tr);
} else {
/* Only check admin requests from different processes. */
if (nvme_qpair_is_admin_queue(qpair) && req->pid != getpid()) {
req_from_current_proc = false;
nvme_pcie_qpair_insert_pending_admin_request(qpair, req, cpl);
} else {
nvme_complete_request(tr->cb_fn, tr->cb_arg, qpair, req, cpl);
}
if (req_from_current_proc == true) {
nvme_qpair_free_request(qpair, req);
}
tr->req = NULL;
TAILQ_REMOVE(&vqpair->outstanding_tr, tr, tq_list);
TAILQ_INSERT_HEAD(&vqpair->free_tr, tr, tq_list);
}
}
static void
nvme_vfio_qpair_manual_complete_tracker(struct spdk_nvme_qpair *qpair,
struct nvme_tracker *tr, uint32_t sct, uint32_t sc, uint32_t dnr,
bool print_on_error)
{
struct spdk_nvme_cpl cpl;
memset(&cpl, 0, sizeof(cpl));
cpl.sqid = qpair->id;
cpl.cid = tr->cid;
cpl.status.sct = sct;
cpl.status.sc = sc;
cpl.status.dnr = dnr;
nvme_vfio_qpair_complete_tracker(qpair, tr, &cpl, print_on_error);
}
static void
nvme_vfio_qpair_abort_trackers(struct spdk_nvme_qpair *qpair, uint32_t dnr)
{
struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair);
struct nvme_tracker *tr, *temp, *last;
last = TAILQ_LAST(&pqpair->outstanding_tr, nvme_outstanding_tr_head);
/* Abort previously submitted (outstanding) trs */
TAILQ_FOREACH_SAFE(tr, &pqpair->outstanding_tr, tq_list, temp) {
if (!qpair->ctrlr->opts.disable_error_logging) {
SPDK_ERRLOG("aborting outstanding command\n");
}
nvme_vfio_qpair_manual_complete_tracker(qpair, tr, SPDK_NVME_SCT_GENERIC,
SPDK_NVME_SC_ABORTED_BY_REQUEST, dnr, true);
if (tr == last) {
break;
}
}
}
static void
nvme_vfio_qpair_abort_reqs(struct spdk_nvme_qpair *qpair, uint32_t dnr)
{
nvme_vfio_qpair_abort_trackers(qpair, dnr);
}
static void
nvme_vfio_admin_qpair_abort_aers(struct spdk_nvme_qpair *qpair)
{
struct nvme_pcie_qpair *vqpair = nvme_pcie_qpair(qpair);
struct nvme_tracker *tr;
tr = TAILQ_FIRST(&vqpair->outstanding_tr);
while (tr != NULL) {
assert(tr->req != NULL);
if (tr->req->cmd.opc == SPDK_NVME_OPC_ASYNC_EVENT_REQUEST) {
nvme_vfio_qpair_manual_complete_tracker(qpair, tr,
SPDK_NVME_SCT_GENERIC, SPDK_NVME_SC_ABORTED_SQ_DELETION, 0,
false);
tr = TAILQ_FIRST(&vqpair->outstanding_tr);
} else {
tr = TAILQ_NEXT(tr, tq_list);
}
}
}
static void
nvme_vfio_admin_qpair_destroy(struct spdk_nvme_qpair *qpair)
{
nvme_vfio_admin_qpair_abort_aers(qpair);
}
static int
nvme_vfio_qpair_destroy(struct spdk_nvme_qpair *qpair)
{
struct nvme_pcie_qpair *vqpair = nvme_pcie_qpair(qpair);
if (nvme_qpair_is_admin_queue(qpair)) {
nvme_vfio_admin_qpair_destroy(qpair);
}
spdk_free(vqpair->cmd);
spdk_free(vqpair->cpl);
if (vqpair->tr) {
spdk_free(vqpair->tr);
}
nvme_qpair_deinit(qpair);
spdk_free(vqpair);
return 0;
}
static inline int
nvme_vfio_prp_list_append(struct nvme_tracker *tr, uint32_t *prp_index, void *virt_addr, size_t len,
uint32_t page_size)
@ -755,7 +464,7 @@ nvme_vfio_qpair_build_contig_request(struct spdk_nvme_qpair *qpair, struct nvme_
rc = nvme_vfio_prp_list_append(tr, &prp_index, req->payload.contig_or_cb_arg + req->payload_offset,
req->payload_size, qpair->ctrlr->page_size);
if (rc) {
nvme_vfio_qpair_manual_complete_tracker(qpair, tr, SPDK_NVME_SCT_GENERIC,
nvme_pcie_qpair_manual_complete_tracker(qpair, tr, SPDK_NVME_SCT_GENERIC,
SPDK_NVME_SC_INVALID_FIELD,
1 /* do not retry */, true);
}
@ -797,7 +506,7 @@ nvme_vfio_qpair_submit_request(struct spdk_nvme_qpair *qpair, struct nvme_reques
}
}
nvme_vfio_qpair_submit_tracker(qpair, tr);
nvme_pcie_qpair_submit_tracker(qpair, tr);
exit:
if (spdk_unlikely(nvme_qpair_is_admin_queue(qpair))) {
@ -807,97 +516,6 @@ exit:
return rc;
}
static int32_t
nvme_vfio_qpair_process_completions(struct spdk_nvme_qpair *qpair, uint32_t max_completions)
{
struct nvme_pcie_qpair *vqpair = nvme_pcie_qpair(qpair);
struct nvme_tracker *tr;
struct spdk_nvme_cpl *cpl, *next_cpl;
uint32_t num_completions = 0;
struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr;
uint16_t next_cq_head;
uint8_t next_phase;
bool next_is_valid = false;
if (spdk_unlikely(nvme_qpair_is_admin_queue(qpair))) {
nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
}
if (max_completions == 0 || max_completions > vqpair->max_completions_cap) {
/*
* max_completions == 0 means unlimited, but complete at most
* max_completions_cap batch of I/O at a time so that the completion
* queue doorbells don't wrap around.
*/
max_completions = vqpair->max_completions_cap;
}
while (1) {
cpl = &vqpair->cpl[vqpair->cq_head];
if (!next_is_valid && cpl->status.p != vqpair->flags.phase) {
break;
}
if (spdk_likely(vqpair->cq_head + 1 != vqpair->num_entries)) {
next_cq_head = vqpair->cq_head + 1;
next_phase = vqpair->flags.phase;
} else {
next_cq_head = 0;
next_phase = !vqpair->flags.phase;
}
next_cpl = &vqpair->cpl[next_cq_head];
next_is_valid = (next_cpl->status.p == next_phase);
if (next_is_valid) {
__builtin_prefetch(&vqpair->tr[next_cpl->cid]);
}
if (spdk_unlikely(++vqpair->cq_head == vqpair->num_entries)) {
vqpair->cq_head = 0;
vqpair->flags.phase = !vqpair->flags.phase;
}
tr = &vqpair->tr[cpl->cid];
/* Prefetch the req's STAILQ_ENTRY since we'll need to access it
* as part of putting the req back on the qpair's free list.
*/
__builtin_prefetch(&tr->req->stailq);
vqpair->sq_head = cpl->sqhd;
if (tr->req) {
nvme_vfio_qpair_complete_tracker(qpair, tr, cpl, true);
} else {
SPDK_ERRLOG("cpl does not map to outstanding cmd\n");
spdk_nvme_qpair_print_completion(qpair, cpl);
assert(0);
}
if (++num_completions == max_completions) {
break;
}
}
if (num_completions > 0) {
nvme_vfio_qpair_ring_cq_doorbell(qpair);
}
if (vqpair->flags.delay_cmd_submit) {
if (vqpair->last_sq_tail != vqpair->sq_tail) {
nvme_vfio_qpair_ring_sq_doorbell(qpair);
vqpair->last_sq_tail = vqpair->sq_tail;
}
}
/* Before returning, complete any pending admin request. */
if (spdk_unlikely(nvme_qpair_is_admin_queue(qpair))) {
nvme_pcie_qpair_complete_pending_admin_request(qpair);
nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
}
return num_completions;
}
const struct spdk_nvme_transport_ops vfio_ops = {
.name = "VFIOUSER",
.type = SPDK_NVME_TRANSPORT_VFIOUSER,
@ -914,16 +532,16 @@ const struct spdk_nvme_transport_ops vfio_ops = {
.ctrlr_get_max_xfer_size = nvme_vfio_ctrlr_get_max_xfer_size,
.ctrlr_get_max_sges = nvme_vfio_ctrlr_get_max_sges,
.ctrlr_create_io_qpair = nvme_vfio_ctrlr_create_io_qpair,
.ctrlr_delete_io_qpair = nvme_vfio_ctrlr_delete_io_qpair,
.ctrlr_create_io_qpair = nvme_pcie_ctrlr_create_io_qpair,
.ctrlr_delete_io_qpair = nvme_pcie_ctrlr_delete_io_qpair,
.ctrlr_connect_qpair = nvme_pcie_ctrlr_connect_qpair,
.ctrlr_disconnect_qpair = nvme_pcie_ctrlr_disconnect_qpair,
.admin_qpair_abort_aers = nvme_vfio_admin_qpair_abort_aers,
.admin_qpair_abort_aers = nvme_pcie_admin_qpair_abort_aers,
.qpair_reset = nvme_pcie_qpair_reset,
.qpair_abort_reqs = nvme_vfio_qpair_abort_reqs,
.qpair_abort_reqs = nvme_pcie_qpair_abort_reqs,
.qpair_submit_request = nvme_vfio_qpair_submit_request,
.qpair_process_completions = nvme_vfio_qpair_process_completions,
.qpair_process_completions = nvme_pcie_qpair_process_completions,
.poll_group_create = nvme_pcie_poll_group_create,
.poll_group_connect_qpair = nvme_pcie_poll_group_connect_qpair,