From 4662e22a99e70144507bc4714fc5bd60cb4a6625 Mon Sep 17 00:00:00 2001 From: Daniel Verkamp Date: Tue, 8 Dec 2015 10:34:30 -0700 Subject: [PATCH] ioat: allocate descriptor rings in parallel arrays Rather than individually allocating each ring entry, use two large allocations, one for the hardware descriptors and one for the software descriptor contexts. This allows the use of simple array indexing on the rings and also allows the removal of most of the software descriptor structure, since the necessary information can be retrieved based on the ring index now. Change-Id: I73ef24450f69ca0fc35e350286282c6b1c77a207 Signed-off-by: Daniel Verkamp --- lib/ioat/ioat.c | 108 ++++++++++++++------------------------- lib/ioat/ioat_internal.h | 14 ++--- 2 files changed, 40 insertions(+), 82 deletions(-) diff --git a/lib/ioat/ioat.c b/lib/ioat/ioat.c index 9166248b6..d00e2b604 100644 --- a/lib/ioat/ioat.c +++ b/lib/ioat/ioat.c @@ -199,44 +199,28 @@ ioat_get_ring_space(struct ioat_channel *ioat) return (1 << ioat->ring_size_order) - ioat_get_active(ioat) - 1; } -static struct ioat_descriptor * -ioat_alloc_ring_entry(struct ioat_channel *ioat) +static uint32_t +ioat_get_ring_index(struct ioat_channel *ioat, uint32_t index) { - struct ioat_descriptor *desc; - struct ioat_dma_hw_descriptor *hw_desc; - - desc = malloc(sizeof(*desc)); - if (desc == NULL) { - return NULL; - } - hw_desc = ioat_zmalloc(NULL, sizeof(*hw_desc), 64, &desc->hw_desc_bus_addr); - if (hw_desc == NULL) { - free(desc); - return NULL; - } - if (desc->hw_desc_bus_addr == 0) { - free(desc); - ioat_free(hw_desc); - return NULL; - } - desc->u.dma = hw_desc; - - return desc; + return index & ((1 << ioat->ring_size_order) - 1); } static void -ioat_free_ring_entry(struct ioat_channel *ioat, struct ioat_descriptor *desc) +ioat_get_ring_entry(struct ioat_channel *ioat, uint32_t index, + struct ioat_descriptor **desc, + struct ioat_dma_hw_descriptor **hw_desc) { - if (desc) { - ioat_free(desc->u.dma); - free(desc); - } + uint32_t i = ioat_get_ring_index(ioat, index); + + *desc = &ioat->ring[i]; + *hw_desc = &ioat->hw_ring[i]; } -static struct ioat_descriptor * -ioat_get_ring_entry(struct ioat_channel *ioat, uint32_t index) +static uint64_t +ioat_get_desc_phys_addr(struct ioat_channel *ioat, uint32_t index) { - return ioat->ring[index % (1 << ioat->ring_size_order)]; + return ioat->hw_ring_phys_addr + + ioat_get_ring_index(ioat, index) * sizeof(struct ioat_dma_hw_descriptor); } static void @@ -261,8 +245,7 @@ ioat_prep_null(struct ioat_channel *ioat) return NULL; } - desc = ioat_get_ring_entry(ioat, ioat->head); - hw_desc = desc->u.dma; + ioat_get_ring_entry(ioat, ioat->head, &desc, &hw_desc); hw_desc->u.control_raw = 0; hw_desc->u.control.op = IOAT_OP_COPY; @@ -294,8 +277,7 @@ ioat_prep_copy(struct ioat_channel *ioat, uint64_t dst, return NULL; } - desc = ioat_get_ring_entry(ioat, ioat->head); - hw_desc = desc->u.dma; + ioat_get_ring_entry(ioat, ioat->head, &desc, &hw_desc); hw_desc->u.control_raw = 0; hw_desc->u.control.op = IOAT_OP_COPY; @@ -361,7 +343,8 @@ static void ioat_process_channel_events(struct ioat_channel *ioat) { struct ioat_descriptor *desc; - uint64_t status, completed_descriptor; + uint64_t status, completed_descriptor, hw_desc_phys_addr; + uint32_t tail; if (ioat->head == ioat->tail) { return; @@ -380,21 +363,19 @@ ioat_process_channel_events(struct ioat_channel *ioat) return; } - while (1) { - desc = ioat_get_ring_entry(ioat, ioat->tail); + do { + tail = ioat_get_ring_index(ioat, ioat->tail); + desc = &ioat->ring[tail]; if (desc->callback_fn) { desc->callback_fn(desc->callback_arg); } + hw_desc_phys_addr = ioat_get_desc_phys_addr(ioat, ioat->tail); ioat->tail++; + } while (hw_desc_phys_addr != completed_descriptor); - if (desc->hw_desc_bus_addr == completed_descriptor) - break; - - } - - ioat->last_seen = desc->hw_desc_bus_addr; + ioat->last_seen = hw_desc_phys_addr; } static int @@ -403,13 +384,13 @@ ioat_channel_destruct(struct ioat_channel *ioat) ioat_unmap_pci_bar(ioat); if (ioat->ring) { - int i; - for (i = 0; i < (1 << ioat->ring_size_order); i++) - ioat_free_ring_entry(ioat, ioat->ring[i]); - free(ioat->ring); } + if (ioat->hw_ring) { + ioat_free(ioat->hw_ring); + } + if (ioat->comp_update) { ioat_free((void *)ioat->comp_update); ioat->comp_update = NULL; @@ -421,12 +402,9 @@ ioat_channel_destruct(struct ioat_channel *ioat) static int ioat_channel_start(struct ioat_channel *ioat) { - struct ioat_descriptor **ring; uint8_t xfercap, version; uint64_t status; int i, num_descriptors; - struct ioat_descriptor *next; - struct ioat_dma_hw_descriptor *dma_hw_desc; uint64_t comp_update_bus_addr; if (ioat_map_pci_bar(ioat) != 0) { @@ -466,33 +444,21 @@ ioat_channel_start(struct ioat_channel *ioat) num_descriptors = 1 << ioat->ring_size_order; - ioat->ring = calloc(num_descriptors, sizeof(*ring)); - + ioat->ring = calloc(num_descriptors, sizeof(struct ioat_descriptor)); if (!ioat->ring) { - ioat_channel_destruct(ioat); return -1; } - ring = ioat->ring; + ioat->hw_ring = ioat_zmalloc(NULL, num_descriptors * sizeof(struct ioat_dma_hw_descriptor), 64, + &ioat->hw_ring_phys_addr); + if (!ioat->hw_ring) { + return -1; + } for (i = 0; i < num_descriptors; i++) { - ring[i] = ioat_alloc_ring_entry(ioat); - - if (!ring[i]) { - ioat_channel_destruct(ioat); - return -1; - } + ioat->hw_ring[i].next = ioat_get_desc_phys_addr(ioat, i + 1); } - for (i = 0; i < num_descriptors - 1; i++) { - next = ring[i + 1]; - dma_hw_desc = ring[i]->u.dma; - - dma_hw_desc->next = next->hw_desc_bus_addr; - } - - ring[i]->u.dma->next = ring[0]->hw_desc_bus_addr; - ioat->head = 0; ioat->tail = 0; ioat->last_seen = 0; @@ -501,7 +467,7 @@ ioat_channel_start(struct ioat_channel *ioat) ioat->regs->chanctrl = IOAT_CHANCTRL_ANY_ERR_ABORT_EN; ioat_write_chancmp(ioat, comp_update_bus_addr); - ioat_write_chainaddr(ioat, ring[0]->hw_desc_bus_addr); + ioat_write_chainaddr(ioat, ioat->hw_ring_phys_addr); ioat_prep_null(ioat); ioat_flush(ioat); @@ -519,7 +485,6 @@ ioat_channel_start(struct ioat_channel *ioat) } else { ioat_printf(ioat, "%s: could not start channel: status = %p\n error = %#x\n", __func__, (void *)status, ioat->regs->chanerr); - ioat_channel_destruct(ioat); return -1; } @@ -546,6 +511,7 @@ ioat_attach(void *device) ioat->device = device; if (ioat_channel_start(ioat) != 0) { + ioat_channel_destruct(ioat); free(ioat); return NULL; } diff --git a/lib/ioat/ioat_internal.h b/lib/ioat/ioat_internal.h index e66605e54..eea1d8a1a 100644 --- a/lib/ioat/ioat_internal.h +++ b/lib/ioat/ioat_internal.h @@ -52,16 +52,6 @@ struct ioat_descriptor { ioat_callback_t callback_fn; void *callback_arg; - union { - struct ioat_dma_hw_descriptor *dma; - struct ioat_fill_hw_descriptor *fill; - struct ioat_xor_hw_descriptor *xor; - struct ioat_xor_ext_hw_descriptor *xor_ext; - struct ioat_pq_hw_descriptor *pq; - struct ioat_pq_ext_hw_descriptor *pq_ext; - struct ioat_raw_hw_descriptor *raw; - } u; - uint64_t hw_desc_bus_addr; }; /* One of these per allocated PCI device. */ @@ -81,7 +71,9 @@ struct ioat_channel { uint32_t ring_size_order; uint64_t last_seen; - struct ioat_descriptor **ring; + struct ioat_descriptor *ring; + struct ioat_dma_hw_descriptor *hw_ring; + uint64_t hw_ring_phys_addr; }; static inline uint32_t