From cf0eac8c66cc591a67bc2905930da1dbdf628daf Mon Sep 17 00:00:00 2001 From: Ben Walker Date: Wed, 6 Mar 2019 13:23:56 -0700 Subject: [PATCH] nvme: Add qpair option to batch command submissions Avoid ringing the submission queue doorbell until the call to spdk_nvme_qpair_process_completions(). Change-Id: I7b3cd952e5ec79109eaa1c3a50f6537d7aaea51a Signed-off-by: Ben Walker Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/447239 Tested-by: SPDK CI Jenkins Reviewed-by: Jim Harris Reviewed-by: Changpeng Liu --- CHANGELOG.md | 8 ++++++++ include/spdk/nvme.h | 11 +++++++++++ lib/nvme/nvme_ctrlr.c | 4 ++++ lib/nvme/nvme_pcie.c | 18 ++++++++++++++++-- 4 files changed, 39 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 445494bdf..bf0463ef9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,14 @@ return a context associated with the specified controllers. Users then call spdk_nvme_probe_poll_async() until it returns 0, indicating that the operation is completed with success. +A new qpair creation option, delay_pcie_doorbell, was added. This can be passed +to spdk_nvme_alloc_io_qpair(). This makes the I/O submission functions, +such as spdk_nvme_ns_writev(), skip ringing the submission queue doorbell. +Instead the doorbell will be rung as necessary inside +spdk_nvme_qpair_process_completions(). This can result in significantly fewer +MMIO writes to the doorbell register under heavy load, greatly improving +performance. + New API spdk_nvme_ctrlr_get_flags() was added. ### raid diff --git a/include/spdk/nvme.h b/include/spdk/nvme.h index 6d289d123..46600277f 100644 --- a/include/spdk/nvme.h +++ b/include/spdk/nvme.h @@ -935,6 +935,17 @@ struct spdk_nvme_io_qpair_opts { * compatibility requirements, or driver-assisted striping. */ uint32_t io_queue_requests; + + /** + * When submitting I/O via spdk_nvme_ns_read/write and similar functions, + * don't immediately write the submission queue doorbell. Instead, write + * to the doorbell as necessary inside spdk_nvme_qpair_process_completions(). + * + * This results in better batching of I/O submission and consequently fewer + * MMIO writes to the doorbell, which may increase performance. + * + * This only applies to local PCIe devices. */ + bool delay_pcie_doorbell; }; /** diff --git a/lib/nvme/nvme_ctrlr.c b/lib/nvme/nvme_ctrlr.c index f289766cc..fcc95fe20 100644 --- a/lib/nvme/nvme_ctrlr.c +++ b/lib/nvme/nvme_ctrlr.c @@ -234,6 +234,10 @@ spdk_nvme_ctrlr_get_default_io_qpair_opts(struct spdk_nvme_ctrlr *ctrlr, opts->io_queue_requests = ctrlr->opts.io_queue_requests; } + if (FIELD_OK(delay_pcie_doorbell)) { + opts->delay_pcie_doorbell = false; + } + #undef FIELD_OK } diff --git a/lib/nvme/nvme_pcie.c b/lib/nvme/nvme_pcie.c index 26d46c509..ba67d0418 100644 --- a/lib/nvme/nvme_pcie.c +++ b/lib/nvme/nvme_pcie.c @@ -171,6 +171,7 @@ struct nvme_pcie_qpair { uint16_t max_completions_cap; + uint16_t last_sq_tail; uint16_t sq_tail; uint16_t cq_head; uint16_t sq_head; @@ -179,6 +180,8 @@ struct nvme_pcie_qpair { bool is_enabled; + bool delay_pcie_doorbell; + /* * Base qpair structure. * This is located after the hot data in this structure so that the important parts of @@ -671,6 +674,7 @@ nvme_pcie_ctrlr_construct_admin_qpair(struct spdk_nvme_ctrlr *ctrlr) } pqpair->num_entries = NVME_ADMIN_ENTRIES; + pqpair->delay_pcie_doorbell = false; ctrlr->adminq = &pqpair->qpair; @@ -936,7 +940,7 @@ nvme_pcie_qpair_reset(struct spdk_nvme_qpair *qpair) { struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); - pqpair->sq_tail = pqpair->cq_head = 0; + pqpair->last_sq_tail = pqpair->sq_tail = pqpair->cq_head = 0; /* * First time through the completion queue, HW will set phase @@ -1206,7 +1210,9 @@ nvme_pcie_qpair_submit_tracker(struct spdk_nvme_qpair *qpair, struct nvme_tracke SPDK_ERRLOG("sq_tail is passing sq_head!\n"); } - nvme_pcie_qpair_ring_sq_doorbell(qpair); + if (!pqpair->delay_pcie_doorbell) { + nvme_pcie_qpair_ring_sq_doorbell(qpair); + } } static void @@ -1583,6 +1589,7 @@ nvme_pcie_ctrlr_create_io_qpair(struct spdk_nvme_ctrlr *ctrlr, uint16_t qid, } pqpair->num_entries = opts->io_queue_size; + pqpair->delay_pcie_doorbell = opts->delay_pcie_doorbell; qpair = &pqpair->qpair; @@ -2118,6 +2125,13 @@ nvme_pcie_qpair_process_completions(struct spdk_nvme_qpair *qpair, uint32_t max_ } } + if (pqpair->delay_pcie_doorbell) { + if (pqpair->last_sq_tail != pqpair->sq_tail) { + nvme_pcie_qpair_ring_sq_doorbell(qpair); + pqpair->last_sq_tail = pqpair->sq_tail; + } + } + if (spdk_unlikely(ctrlr->timeout_enabled)) { /* * User registered for timeout callback