From 8aa497f083979194f6a0a65107806f33bcfa3c6c Mon Sep 17 00:00:00 2001 From: Changpeng Liu Date: Fri, 29 Jan 2016 16:01:43 +0800 Subject: [PATCH] spdk: Add block fill API to ioat driver For those Crystal Beach DMA channels which support block fill capability, we add a fill API here that can zero out pages or fill them with a fixed pattern. Change-Id: I8a57337702b951c703d494004b111f6d206279fb Signed-off-by: Changpeng Liu --- examples/ioat/verify/verify.c | 73 ++++++++++++++++++++---- include/spdk/ioat.h | 25 ++++++++ include/spdk/ioat_spec.h | 7 +++ lib/ioat/ioat.c | 104 ++++++++++++++++++++++++++++++++++ lib/ioat/ioat_internal.h | 1 + 5 files changed, 199 insertions(+), 11 deletions(-) diff --git a/examples/ioat/verify/verify.c b/examples/ioat/verify/verify.c index 95fc0e563..dcf4ff02e 100644 --- a/examples/ioat/verify/verify.c +++ b/examples/ioat/verify/verify.c @@ -49,6 +49,11 @@ #define SRC_BUFFER_SIZE (512*1024) +enum ioat_task_type { + IOAT_COPY_TYPE, + IOAT_FILL_TYPE, +}; + struct user_config { int queue_depth; int time_in_sec; @@ -67,6 +72,8 @@ static struct user_config g_user_config; struct thread_entry { uint64_t xfer_completed; uint64_t xfer_failed; + uint64_t fill_completed; + uint64_t fill_failed; uint64_t current_queue_depth; unsigned lcore_id; bool is_draining; @@ -75,9 +82,11 @@ struct thread_entry { }; struct ioat_task { + enum ioat_task_type type; struct thread_entry *thread_entry; void *buffer; int len; + uint64_t fill_pattern; void *src; void *dst; }; @@ -124,14 +133,29 @@ static void prepare_ioat_task(struct thread_entry *thread_entry, struct ioat_tas int len; int src_offset; int dst_offset; + int num_ddwords; + uint64_t fill_pattern; - src_offset = rand_r(&seed) % SRC_BUFFER_SIZE; - len = rand_r(&seed) % (SRC_BUFFER_SIZE - src_offset); - dst_offset = rand_r(&seed) % (SRC_BUFFER_SIZE - len); + if (ioat_task->type == IOAT_FILL_TYPE) { + fill_pattern = rand_r(&seed); + fill_pattern = fill_pattern << 32 | rand_r(&seed); - memset(ioat_task->buffer, 0, SRC_BUFFER_SIZE); + /* ensure that the length of memset block is 8 Bytes aligned */ + num_ddwords = (rand_r(&seed) % SRC_BUFFER_SIZE) / 8; + len = num_ddwords * 8; + if (len < 8) + len = 8; + dst_offset = rand_r(&seed) % (SRC_BUFFER_SIZE - len); + ioat_task->fill_pattern = fill_pattern; + } else { + src_offset = rand_r(&seed) % SRC_BUFFER_SIZE; + len = rand_r(&seed) % (SRC_BUFFER_SIZE - src_offset); + dst_offset = rand_r(&seed) % (SRC_BUFFER_SIZE - len); + + memset(ioat_task->buffer, 0, SRC_BUFFER_SIZE); + ioat_task->src = g_src + src_offset; + } ioat_task->len = len; - ioat_task->src = g_src + src_offset; ioat_task->dst = ioat_task->buffer + dst_offset; ioat_task->thread_entry = thread_entry; } @@ -139,14 +163,31 @@ static void prepare_ioat_task(struct thread_entry *thread_entry, struct ioat_tas static void ioat_done(void *cb_arg) { + uint64_t *value; + int i, failed = 0; struct ioat_task *ioat_task = (struct ioat_task *)cb_arg; struct thread_entry *thread_entry = ioat_task->thread_entry; - if (memcmp(ioat_task->src, ioat_task->dst, ioat_task->len)) { - thread_entry->xfer_failed++; + if (ioat_task->type == IOAT_FILL_TYPE) { + value = (uint64_t *)ioat_task->dst; + for (i = 0; i < ioat_task->len / 8; i++) { + if (*value != ioat_task->fill_pattern) { + thread_entry->fill_failed++; + failed = 1; + break; + } + value++; + } + if (!failed) + thread_entry->fill_completed++; } else { - thread_entry->xfer_completed++; + if (memcmp(ioat_task->src, ioat_task->dst, ioat_task->len)) { + thread_entry->xfer_failed++; + } else { + thread_entry->xfer_completed++; + } } + thread_entry->current_queue_depth--; if (thread_entry->is_draining) { rte_mempool_put(thread_entry->data_pool, ioat_task->buffer); @@ -278,7 +319,10 @@ drain_xfers(struct thread_entry *thread_entry) static void submit_single_xfer(struct ioat_task *ioat_task) { - ioat_submit_copy(ioat_task, ioat_done, ioat_task->dst, ioat_task->src, ioat_task->len); + if (ioat_task->type == IOAT_FILL_TYPE) + ioat_submit_fill(ioat_task, ioat_done, ioat_task->dst, ioat_task->fill_pattern, ioat_task->len); + else + ioat_submit_copy(ioat_task, ioat_done, ioat_task->dst, ioat_task->src, ioat_task->len); ioat_task->thread_entry->current_queue_depth++; } @@ -290,6 +334,11 @@ submit_xfers(struct thread_entry *thread_entry, uint64_t queue_depth) rte_mempool_get(thread_entry->task_pool, (void **)&ioat_task); rte_mempool_get(thread_entry->data_pool, &(ioat_task->buffer)); + ioat_task->type = IOAT_COPY_TYPE; + if (ioat_get_dma_capabilities() & IOAT_ENGINE_FILL_SUPPORTED) { + if (queue_depth % 2) + ioat_task->type = IOAT_FILL_TYPE; + } prepare_ioat_task(thread_entry, ioat_task); submit_single_xfer(ioat_task); } @@ -397,10 +446,12 @@ dump_result(struct thread_entry *threads, int len) for (i = 0; i < len; i++) { struct thread_entry *t = &threads[i]; total_completed += t->xfer_completed; + total_completed += t->fill_completed; total_failed += t->xfer_failed; + total_failed += t->fill_failed; if (t->xfer_completed || t->xfer_failed) - printf("lcore = %d, success = %ld, failed = %ld \n", - t->lcore_id, t->xfer_completed, t->xfer_failed); + printf("lcore = %d, copy success = %ld, copy failed = %ld, fill success = %ld, fill failed = %ld \n", + t->lcore_id, t->xfer_completed, t->xfer_failed, t->fill_completed, t->fill_failed); } return total_failed ? 1 : 0; } diff --git a/include/spdk/ioat.h b/include/spdk/ioat.h index 3eec6b13a..07370bad3 100644 --- a/include/spdk/ioat.h +++ b/include/spdk/ioat.h @@ -90,6 +90,15 @@ void ioat_unregister_thread(void); int64_t ioat_submit_copy(void *cb_arg, ioat_callback_t cb_fn, void *dst, const void *src, uint64_t nbytes); +/** + * Submit a DMA engine memory fill request. + * + * Before submitting any requests on a thread, the thread must be registered + * using the \ref ioat_register_thread() function. + */ +int64_t ioat_submit_fill(void *cb_arg, ioat_callback_t cb_fn, + void *dst, uint64_t fill_pattern, uint64_t nbytes); + /** * Check for completed requests on the current thread. * @@ -100,4 +109,20 @@ int64_t ioat_submit_copy(void *cb_arg, ioat_callback_t cb_fn, */ int ioat_process_events(void); +/** + * DMA engine capability flags + */ +enum ioat_dma_capability_flags { + IOAT_ENGINE_COPY_SUPPORTED = 0x1, /**< The memory copy is supported */ + IOAT_ENGINE_FILL_SUPPORTED = 0x2, /**< The memory fill is supported */ +}; + +/** + * Get the DMA engine capabilities. + * + * Before submitting any requests on a thread, the thread must be registered + * using the \ref ioat_register_thread() function. + */ +uint32_t ioat_get_dma_capabilities(void); + #endif diff --git a/include/spdk/ioat_spec.h b/include/spdk/ioat_spec.h index bb65b5bee..f01343f83 100644 --- a/include/spdk/ioat_spec.h +++ b/include/spdk/ioat_spec.h @@ -53,6 +53,13 @@ #define IOAT_CHANCTRL_ERR_COMPLETION_EN 0x0004 #define IOAT_CHANCTRL_INT_REARM 0x0001 +/* DMA Channel Capabilities */ +#define IOAT_DMACAP_PB (1 << 0) +#define IOAT_DMACAP_DCA (1 << 4) +#define IOAT_DMACAP_BFILL (1 << 6) +#define IOAT_DMACAP_XOR (1 << 8) +#define IOAT_DMACAP_PQ (1 << 9) +#define IOAT_DMACAP_DMA_DIF (1 << 10) struct ioat_registers { uint8_t chancnt; diff --git a/lib/ioat/ioat.c b/lib/ioat/ioat.c index b90c90e57..b20b62575 100644 --- a/lib/ioat/ioat.c +++ b/lib/ioat/ioat.c @@ -295,6 +295,37 @@ ioat_prep_copy(struct ioat_channel *ioat, uint64_t dst, return desc; } +static struct ioat_descriptor * +ioat_prep_fill(struct ioat_channel *ioat, uint64_t dst, + uint64_t fill_pattern, uint32_t len) +{ + struct ioat_descriptor *desc; + union ioat_hw_descriptor *hw_desc; + + ioat_assert(len <= ioat->max_xfer_size); + + if (ioat_get_ring_space(ioat) < 1) { + return NULL; + } + + ioat_get_ring_entry(ioat, ioat->head, &desc, &hw_desc); + + hw_desc->fill.u.control_raw = 0; + hw_desc->fill.u.control.op = IOAT_OP_FILL; + hw_desc->fill.u.control.completion_update = 1; + + hw_desc->fill.size = len; + hw_desc->fill.src_data = fill_pattern; + hw_desc->fill.dest_addr = dst; + + desc->callback_fn = NULL; + desc->callback_arg = NULL; + + ioat_submit_single(ioat); + + return desc; +} + static int ioat_reset_hw(struct ioat_channel *ioat) { int timeout; @@ -419,6 +450,10 @@ ioat_channel_start(struct ioat_channel *ioat) return -1; } + /* Always support DMA copy */ + ioat->dma_capabilities = IOAT_ENGINE_COPY_SUPPORTED; + if (ioat->regs->dmacapability & IOAT_DMACAP_BFILL) + ioat->dma_capabilities |= IOAT_ENGINE_FILL_SUPPORTED; xfercap = ioat->regs->xfercap; /* Only bits [4:0] are valid. */ @@ -663,6 +698,75 @@ ioat_submit_copy(void *cb_arg, ioat_callback_t cb_fn, return nbytes; } +int64_t +ioat_submit_fill(void *cb_arg, ioat_callback_t cb_fn, + void *dst, uint64_t fill_pattern, uint64_t nbytes) +{ + struct ioat_channel *ioat; + struct ioat_descriptor *last_desc = NULL; + uint64_t remaining, op_size; + uint64_t vdst; + uint32_t orig_head; + + ioat = ioat_thread_channel; + if (!ioat) { + return -1; + } + + if (!(ioat->dma_capabilities & IOAT_ENGINE_FILL_SUPPORTED)) { + ioat_printf(ioat, "Channel does not support memory fill\n"); + return -1; + } + + orig_head = ioat->head; + + vdst = (uint64_t)dst; + remaining = nbytes; + + while (remaining) { + op_size = remaining; + op_size = min(op_size, ioat->max_xfer_size); + remaining -= op_size; + + last_desc = ioat_prep_fill(ioat, + ioat_vtophys((void *)vdst), + fill_pattern, + op_size); + + if (remaining == 0 || last_desc == NULL) { + break; + } + + vdst += op_size; + } + + if (last_desc) { + last_desc->callback_fn = cb_fn; + last_desc->callback_arg = cb_arg; + } else { + /* + * Ran out of descriptors in the ring - reset head to leave things as they were + * in case we managed to fill out any descriptors. + */ + ioat->head = orig_head; + return -1; + } + + ioat_flush(ioat); + return nbytes; +} + +uint32_t ioat_get_dma_capabilities(void) +{ + struct ioat_channel *ioat; + + ioat = ioat_thread_channel; + if (!ioat) { + return 0; + } + return ioat->dma_capabilities; +} + int ioat_process_events(void) { if (!ioat_thread_channel) { diff --git a/lib/ioat/ioat_internal.h b/lib/ioat/ioat_internal.h index 030d2780d..a2e3f2ccf 100644 --- a/lib/ioat/ioat_internal.h +++ b/lib/ioat/ioat_internal.h @@ -74,6 +74,7 @@ struct ioat_channel { struct ioat_descriptor *ring; union ioat_hw_descriptor *hw_ring; uint64_t hw_ring_phys_addr; + uint32_t dma_capabilities; }; static inline uint32_t