nvme: add tracker prefetching in completion path

At 10M IO/s, we see a lot of CPU cycles wasted getting the next tracker into cache. If we only get one completion at a time, this is unavoidable, but when there are multiple completions pending, we can prefetch the second tracker while processing the completion for the first. Signed-off-by: Jim Harris <james.r.harris@intel.com> Change-Id: I9de702bee3719e4494eec6f05b09be3672f1e0ac Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/456097 Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Ben Walker <benjamin.walker@intel.com>
2019-05-28 08:52:36 -07:00 · 2019-05-28 08:52:36 -07:00 · 6c820f84cb
commit 6c820f84cb
parent f2d46446ca
1 changed files with 19 additions and 2 deletions
--- a/lib/nvme/nvme_pcie.c
+++ b/lib/nvme/nvme_pcie.c
@ -2048,9 +2048,12 @@ nvme_pcie_qpair_process_completions(struct spdk_nvme_qpair *qpair, uint32_t max_
 {
 	struct nvme_pcie_qpair	*pqpair = nvme_pcie_qpair(qpair);
 	struct nvme_tracker	*tr;
-	struct spdk_nvme_cpl	*cpl;
+	struct spdk_nvme_cpl	*cpl, *next_cpl;
 	uint32_t		 num_completions = 0;
 	struct spdk_nvme_ctrlr	*ctrlr = qpair->ctrlr;
+	uint16_t		 next_cq_head;
+	uint8_t			 next_phase;
+	bool			 next_is_valid = false;

 	if (spdk_unlikely(nvme_qpair_is_admin_queue(qpair))) {
 		nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
@ -2068,9 +2071,23 @@ nvme_pcie_qpair_process_completions(struct spdk_nvme_qpair *qpair, uint32_t max_
 	while (1) {
 		cpl = &pqpair->cpl[pqpair->cq_head];

-		if (cpl->status.p != pqpair->flags.phase) {
+		if (!next_is_valid && cpl->status.p != pqpair->flags.phase) {
 			break;
 		}
+
+		if (spdk_likely(pqpair->cq_head + 1 != pqpair->num_entries)) {
+			next_cq_head = pqpair->cq_head + 1;
+			next_phase = pqpair->flags.phase;
+		} else {
+			next_cq_head = 0;
+			next_phase = !pqpair->flags.phase;
+		}
+		next_cpl = &pqpair->cpl[next_cq_head];
+		next_is_valid = (next_cpl->status.p == next_phase);
+		if (next_is_valid) {
+			__builtin_prefetch(&pqpair->tr[next_cpl->cid]);
+		}
+
 #ifdef __PPC64__
 		/*
 		 * This memory barrier prevents reordering of: