diff --git a/examples/accel/perf/accel_perf.c b/examples/accel/perf/accel_perf.c index d9c184ec8..75c269460 100644 --- a/examples/accel/perf/accel_perf.c +++ b/examples/accel/perf/accel_perf.c @@ -44,7 +44,6 @@ #define DATA_PATTERN 0x5a #define ALIGN_4K 0x1000 -static bool g_using_sw_engine = false; static uint64_t g_tsc_rate; static uint64_t g_tsc_end; static int g_rc; @@ -84,13 +83,11 @@ struct ap_task { void *dst2; uint32_t crc_dst; struct worker_thread *worker; - int status; int expected_status; /* used for the compare operation */ TAILQ_ENTRY(ap_task) link; }; struct accel_batch { - int status; int cmd_count; struct spdk_accel_batch *batch; struct worker_thread *worker; @@ -493,7 +490,6 @@ _free_task_buffers(struct ap_task *task) } } -static void _batch_done(void *cb_arg); static void _build_batch(struct worker_thread *worker, struct ap_task *task) { @@ -562,16 +558,17 @@ _drain_batch(struct worker_thread *worker) } static void -_batch_done(void *cb_arg) +batch_done(void *arg1, int status) { - struct accel_batch *worker_batch = (struct accel_batch *)cb_arg; + struct accel_batch *worker_batch = (struct accel_batch *)arg1; struct worker_thread *worker = worker_batch->worker; int rc; + assert(worker); assert(TAILQ_EMPTY(&worker->in_use_batches) == 0); - if (worker_batch->status) { - SPDK_ERRLOG("error %d\n", worker_batch->status); + if (status) { + SPDK_ERRLOG("error %d\n", status); } worker->current_queue_depth--; @@ -603,17 +600,6 @@ _batch_done(void *cb_arg) } } -static void -batch_done(void *cb_arg, int status) -{ - struct accel_batch *worker_batch = (struct accel_batch *)cb_arg; - - assert(worker_batch->worker); - - worker_batch->status = status; - spdk_thread_send_msg(worker_batch->worker->thread, _batch_done, worker_batch); -} - static int _vector_memcmp(void *_dst, struct iovec *src_iovs, uint32_t iovcnt) { @@ -637,7 +623,7 @@ _vector_memcmp(void *_dst, struct iovec *src_iovs, uint32_t iovcnt) } static void -_accel_done(void *arg1) +accel_done(void *arg1, int status) { struct ap_task *task = arg1; struct worker_thread *worker = task->worker; @@ -646,7 +632,7 @@ _accel_done(void *arg1) assert(worker); assert(worker->current_queue_depth > 0); - if (g_verify && task->status == 0) { + if (g_verify && status == 0) { switch (g_workload_selection) { case ACCEL_COPY_CRC32C: sw_crc32c = spdk_crc32c_iov_update(task->iovs, task->iov_cnt, ~g_crc32c_seed); @@ -697,9 +683,9 @@ _accel_done(void *arg1) } if (task->expected_status == -EILSEQ) { - assert(task->status != 0); + assert(status != 0); worker->injected_miscompares++; - } else if (task->status) { + } else if (status) { /* Expected to pass but the accel engine reported an error (ex: COMPARE operation). */ worker->xfer_failed++; } @@ -978,22 +964,6 @@ error: spdk_app_stop(-1); } -static void -accel_done(void *cb_arg, int status) -{ - struct ap_task *task = (struct ap_task *)cb_arg; - struct worker_thread *worker = task->worker; - - assert(worker); - - task->status = status; - if (g_using_sw_engine == false) { - _accel_done(task); - } else { - spdk_thread_send_msg(worker->thread, _accel_done, task); - } -} - static inline void identify_accel_engine_usage(void) { @@ -1005,7 +975,6 @@ identify_accel_engine_usage(void) capabilities = spdk_accel_get_capabilities(ch); if ((capabilities & g_workload_selection) != g_workload_selection) { - g_using_sw_engine = true; SPDK_WARNLOG("The selected workload is not natively supported by the current engine\n"); SPDK_WARNLOG("The software engine will be used instead.\n\n"); } diff --git a/include/spdk_internal/accel_engine.h b/include/spdk_internal/accel_engine.h index f307e9cbb..5a72dacb3 100644 --- a/include/spdk_internal/accel_engine.h +++ b/include/spdk_internal/accel_engine.h @@ -53,6 +53,11 @@ struct accel_io_channel { TAILQ_HEAD(, spdk_accel_batch) batches; }; +struct sw_accel_io_channel { + struct spdk_poller *completion_poller; + TAILQ_HEAD(, spdk_accel_task) tasks_to_complete; +}; + struct spdk_accel_batch { /* Lists of commands in the batch. */ TAILQ_HEAD(, spdk_accel_task) hw_tasks; @@ -104,8 +109,8 @@ struct spdk_accel_task { uint32_t *crc_dst; enum accel_opcode op_code; uint64_t nbytes; + int status; TAILQ_ENTRY(spdk_accel_task) link; - uint8_t offload_ctx[0]; /* Not currently used. */ }; struct spdk_accel_engine { diff --git a/lib/accel/Makefile b/lib/accel/Makefile index 4492949ad..85430a4cb 100644 --- a/lib/accel/Makefile +++ b/lib/accel/Makefile @@ -34,7 +34,7 @@ SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..) include $(SPDK_ROOT_DIR)/mk/spdk.common.mk -SO_VER := 6 +SO_VER := 7 SO_MINOR := 0 SO_SUFFIX := $(SO_VER).$(SO_MINOR) diff --git a/lib/accel/accel_engine.c b/lib/accel/accel_engine.c index 4dde595e8..a6b7a1dfa 100644 --- a/lib/accel/accel_engine.c +++ b/lib/accel/accel_engine.c @@ -184,6 +184,17 @@ _get_task(struct accel_io_channel *accel_ch, struct spdk_accel_batch *batch, return accel_task; } +/* Post SW completions to a list and complete in a poller as we don't want to + * complete them on the caller's stack as they'll likely submit another. */ +inline static void +_add_to_comp_list(struct accel_io_channel *accel_ch, struct spdk_accel_task *accel_task, int status) +{ + struct sw_accel_io_channel *sw_ch = spdk_io_channel_get_ctx(accel_ch->engine_ch); + + accel_task->status = status; + TAILQ_INSERT_TAIL(&sw_ch->tasks_to_complete, accel_task, link); +} + /* Accel framework public API for copy function */ int spdk_accel_submit_copy(struct spdk_io_channel *ch, void *dst, void *src, uint64_t nbytes, @@ -206,7 +217,7 @@ spdk_accel_submit_copy(struct spdk_io_channel *ch, void *dst, void *src, uint64_ return accel_ch->engine->submit_tasks(accel_ch->engine_ch, accel_task); } else { _sw_accel_copy(dst, src, nbytes); - spdk_accel_task_complete(accel_task, 0); + _add_to_comp_list(accel_ch, accel_task, 0); return 0; } } @@ -239,7 +250,7 @@ spdk_accel_submit_dualcast(struct spdk_io_channel *ch, void *dst1, void *dst2, v return accel_ch->engine->submit_tasks(accel_ch->engine_ch, accel_task); } else { _sw_accel_dualcast(dst1, dst2, src, nbytes); - spdk_accel_task_complete(accel_task, 0); + _add_to_comp_list(accel_ch, accel_task, 0); return 0; } } @@ -267,7 +278,7 @@ spdk_accel_submit_compare(struct spdk_io_channel *ch, void *src1, void *src2, ui return accel_ch->engine->submit_tasks(accel_ch->engine_ch, accel_task); } else { rc = _sw_accel_compare(src1, src2, nbytes); - spdk_accel_task_complete(accel_task, rc); + _add_to_comp_list(accel_ch, accel_task, rc); return 0; } } @@ -294,7 +305,7 @@ spdk_accel_submit_fill(struct spdk_io_channel *ch, void *dst, uint8_t fill, uint return accel_ch->engine->submit_tasks(accel_ch->engine_ch, accel_task); } else { _sw_accel_fill(dst, fill, nbytes); - spdk_accel_task_complete(accel_task, 0); + _add_to_comp_list(accel_ch, accel_task, 0); return 0; } } @@ -323,7 +334,7 @@ spdk_accel_submit_crc32c(struct spdk_io_channel *ch, uint32_t *crc_dst, void *sr return accel_ch->engine->submit_tasks(accel_ch->engine_ch, accel_task); } else { _sw_accel_crc32c(crc_dst, src, seed, nbytes); - spdk_accel_task_complete(accel_task, 0); + _add_to_comp_list(accel_ch, accel_task, 0); return 0; } } @@ -405,7 +416,7 @@ spdk_accel_submit_crc32cv(struct spdk_io_channel *ch, uint32_t *crc_dst, struct return accel_ch->engine->submit_tasks(accel_ch->engine_ch, accel_task); } else { _sw_accel_crc32cv(crc_dst, iov, iov_cnt, seed); - spdk_accel_task_complete(accel_task, 0); + _add_to_comp_list(accel_ch, accel_task, 0); return 0; } } @@ -437,7 +448,7 @@ spdk_accel_submit_copy_crc32c(struct spdk_io_channel *ch, void *dst, void *src, } else { _sw_accel_copy(dst, src, nbytes); _sw_accel_crc32c(crc_dst, src, seed, nbytes); - spdk_accel_task_complete(accel_task, 0); + _add_to_comp_list(accel_ch, accel_task, 0); return 0; } } @@ -493,7 +504,7 @@ spdk_accel_submit_copy_crc32cv(struct spdk_io_channel *ch, void *dst, struct iov } else { _sw_accel_copy(dst, src_iovs[0].iov_base, src_iovs[0].iov_len); _sw_accel_crc32cv(crc_dst, src_iovs, iov_cnt, seed); - spdk_accel_task_complete(accel_task, 0); + _add_to_comp_list(accel_ch, accel_task, 0); return 0; } } @@ -826,15 +837,15 @@ spdk_accel_batch_submit(struct spdk_io_channel *ch, struct spdk_accel_batch *bat switch (accel_task->op_code) { case ACCEL_OPCODE_MEMMOVE: _sw_accel_copy(accel_task->dst, accel_task->src, accel_task->nbytes); - spdk_accel_task_complete(accel_task, 0); + _add_to_comp_list(accel_ch, accel_task, 0); break; case ACCEL_OPCODE_MEMFILL: _sw_accel_fill(accel_task->dst, accel_task->fill_pattern, accel_task->nbytes); - spdk_accel_task_complete(accel_task, 0); + _add_to_comp_list(accel_ch, accel_task, 0); break; case ACCEL_OPCODE_COMPARE: rc = _sw_accel_compare(accel_task->src, accel_task->src2, accel_task->nbytes); - spdk_accel_task_complete(accel_task, rc); + _add_to_comp_list(accel_ch, accel_task, rc); batch->status |= rc; break; case ACCEL_OPCODE_CRC32C: @@ -844,17 +855,17 @@ spdk_accel_batch_submit(struct spdk_io_channel *ch, struct spdk_accel_batch *bat } else { _sw_accel_crc32cv(accel_task->crc_dst, accel_task->v.iovs, accel_task->v.iovcnt, accel_task->seed); } - spdk_accel_task_complete(accel_task, 0); + _add_to_comp_list(accel_ch, accel_task, 0); break; case ACCEL_OPCODE_COPY_CRC32C: _sw_accel_copy(accel_task->dst, accel_task->src, accel_task->nbytes); _sw_accel_crc32c(accel_task->crc_dst, accel_task->src, accel_task->seed, accel_task->nbytes); - spdk_accel_task_complete(accel_task, 0); + _add_to_comp_list(accel_ch, accel_task, 0); break; case ACCEL_OPCODE_DUALCAST: _sw_accel_dualcast(accel_task->dst, accel_task->dst2, accel_task->src, accel_task->nbytes); - spdk_accel_task_complete(accel_task, 0); + _add_to_comp_list(accel_ch, accel_task, 0); break; default: assert(false); @@ -1120,15 +1131,45 @@ static struct spdk_accel_engine sw_accel_engine = { .batch_get_max = sw_accel_batch_get_max, }; +static int +accel_comp_poll(void *arg) +{ + struct sw_accel_io_channel *sw_ch = arg; + TAILQ_HEAD(, spdk_accel_task) tasks_to_complete; + struct spdk_accel_task *accel_task; + + if (TAILQ_EMPTY(&sw_ch->tasks_to_complete)) { + return SPDK_POLLER_IDLE; + } + + TAILQ_INIT(&tasks_to_complete); + TAILQ_SWAP(&tasks_to_complete, &sw_ch->tasks_to_complete, spdk_accel_task, link); + + while ((accel_task = TAILQ_FIRST(&tasks_to_complete))) { + TAILQ_REMOVE(&tasks_to_complete, accel_task, link); + spdk_accel_task_complete(accel_task, accel_task->status); + } + + return SPDK_POLLER_BUSY; +} + static int sw_accel_create_cb(void *io_device, void *ctx_buf) { + struct sw_accel_io_channel *sw_ch = ctx_buf; + + TAILQ_INIT(&sw_ch->tasks_to_complete); + sw_ch->completion_poller = SPDK_POLLER_REGISTER(accel_comp_poll, sw_ch, 0); + return 0; } static void sw_accel_destroy_cb(void *io_device, void *ctx_buf) { + struct sw_accel_io_channel *sw_ch = ctx_buf; + + spdk_poller_unregister(&sw_ch->completion_poller); } static struct spdk_io_channel *sw_accel_get_io_channel(void) @@ -1147,7 +1188,7 @@ sw_accel_engine_init(void) { accel_sw_register(&sw_accel_engine); spdk_io_device_register(&sw_accel_engine, sw_accel_create_cb, sw_accel_destroy_cb, - 0, "sw_accel_engine"); + sizeof(struct sw_accel_io_channel), "sw_accel_engine"); return 0; }