diff --git a/lib/accel/accel.c b/lib/accel/accel.c index ed02f3d5d..01e4f6350 100644 --- a/lib/accel/accel.c +++ b/lib/accel/accel.c @@ -815,11 +815,89 @@ accel_sequence_task_cb(void *cb_arg, int status) accel_process_sequence(seq); } +static bool +accel_compare_iovs(struct iovec *iova, uint32_t iovacnt, struct iovec *iovb, uint32_t iovbcnt) +{ + /* For now, just do a dumb check that the iovecs arrays are exactly the same */ + if (iovacnt != iovbcnt) { + return false; + } + + return memcmp(iova, iovb, sizeof(*iova) * iovacnt) == 0; +} + +static void +accel_sequence_merge_tasks(struct spdk_accel_sequence *seq, struct spdk_accel_task *task, + struct spdk_accel_task **next_task) +{ + struct spdk_accel_task *next = *next_task; + + switch (task->op_code) { + case ACCEL_OPC_COPY: + /* We only allow changing src of operations that actually have a src, e.g. we never + * do it for fill. Theoretically, it is possible, but we'd have to be careful to + * change the src of the operation after fill (which in turn could also be a fill). + * So, for the sake of simplicity, skip this type of operations for now. + */ + if (next->op_code != ACCEL_OPC_DECOMPRESS && + next->op_code != ACCEL_OPC_COPY) { + break; + } + if (task->dst_domain != next->src_domain) { + break; + } + if (!accel_compare_iovs(task->d.iovs, task->d.iovcnt, + next->s.iovs, next->s.iovcnt)) { + break; + } + next->s.iovs = task->s.iovs; + next->s.iovcnt = task->s.iovcnt; + next->src_domain = task->src_domain; + TAILQ_REMOVE(&seq->tasks, task, seq_link); + TAILQ_INSERT_TAIL(&seq->completed, task, seq_link); + break; + case ACCEL_OPC_DECOMPRESS: + /* We can only merge tasks when one of them is a copy */ + if (next->op_code != ACCEL_OPC_COPY) { + break; + } + if (task->dst_domain != next->src_domain) { + break; + } + if (!accel_compare_iovs(task->d.iovs, task->d.iovcnt, + next->s.iovs, next->s.iovcnt)) { + break; + } + task->d.iovs = next->d.iovs; + task->d.iovcnt = next->d.iovcnt; + task->dst_domain = next->dst_domain; + /* We're removing next_task from the tasks queue, so we need to update its pointer, + * so that the TAILQ_FOREACH_SAFE() loop below works correctly */ + *next_task = TAILQ_NEXT(next, seq_link); + TAILQ_REMOVE(&seq->tasks, next, seq_link); + TAILQ_INSERT_TAIL(&seq->completed, next, seq_link); + break; + case ACCEL_OPC_FILL: + break; + default: + assert(0 && "bad opcode"); + break; + } +} + int spdk_accel_sequence_finish(struct spdk_accel_sequence *seq, spdk_accel_completion_cb cb_fn, void *cb_arg) { - struct spdk_accel_task *task; + struct spdk_accel_task *task, *next; + + /* Try to remove any copy operations if possible */ + TAILQ_FOREACH_SAFE(task, &seq->tasks, seq_link, next) { + if (next == NULL) { + break; + } + accel_sequence_merge_tasks(seq, task, &next); + } /* Since we store copy operations' buffers as iovecs, we need to convert them to scalar * buffers, as that's what accel modules expect diff --git a/test/unit/lib/accel/accel.c/accel_ut.c b/test/unit/lib/accel/accel.c/accel_ut.c index a4bc359aa..40e9003ae 100644 --- a/test/unit/lib/accel/accel.c/accel_ut.c +++ b/test/unit/lib/accel/accel.c/accel_ut.c @@ -893,6 +893,11 @@ test_sequence_append_error(void) struct ut_sequence_operation { int complete_status; int submit_status; + int count; + struct iovec *src_iovs; + uint32_t src_iovcnt; + struct iovec *dst_iovs; + uint32_t dst_iovcnt; }; static struct ut_sequence_operation g_seq_operations[ACCEL_OPC_LAST]; @@ -902,6 +907,18 @@ ut_sequnce_submit_tasks(struct spdk_io_channel *ch, struct spdk_accel_task *task { struct ut_sequence_operation *op = &g_seq_operations[task->op_code]; + if (op->src_iovs != NULL) { + CU_ASSERT_EQUAL(task->s.iovcnt, op->src_iovcnt); + CU_ASSERT_EQUAL(memcmp(task->s.iovs, op->src_iovs, + sizeof(struct iovec) * op->src_iovcnt), 0); + } + if (op->dst_iovs != NULL) { + CU_ASSERT_EQUAL(task->d.iovcnt, op->dst_iovcnt); + CU_ASSERT_EQUAL(memcmp(task->d.iovs, op->dst_iovs, + sizeof(struct iovec) * op->dst_iovcnt), 0); + } + + op->count++; if (op->submit_status != 0) { return op->submit_status; } @@ -1192,6 +1209,269 @@ test_sequence_decompress(void) } #endif +static void +test_sequence_copy_elision(void) +{ + struct spdk_accel_sequence *seq = NULL; + struct spdk_io_channel *ioch; + struct ut_sequence ut_seq; + struct iovec src_iovs[4], dst_iovs[4], exp_iovs[2]; + char buf[4096], tmp[4][4096]; + struct spdk_accel_module_if *modules[ACCEL_OPC_LAST]; + int i, rc, completed; + + ioch = spdk_accel_get_io_channel(); + SPDK_CU_ASSERT_FATAL(ioch != NULL); + + /* Override the submit_tasks function */ + g_module.submit_tasks = ut_sequnce_submit_tasks; + for (i = 0; i < ACCEL_OPC_LAST; ++i) { + g_seq_operations[i].complete_status = 0; + g_seq_operations[i].submit_status = 0; + g_seq_operations[i].count = 0; + + modules[i] = g_modules_opc[i]; + g_modules_opc[i] = &g_module; + } + + /* Check that a copy operation at the beginning is removed */ + seq = NULL; + completed = 0; + g_seq_operations[ACCEL_OPC_DECOMPRESS].dst_iovcnt = 1; + g_seq_operations[ACCEL_OPC_DECOMPRESS].src_iovcnt = 1; + g_seq_operations[ACCEL_OPC_DECOMPRESS].src_iovs = &exp_iovs[0]; + g_seq_operations[ACCEL_OPC_DECOMPRESS].dst_iovs = &exp_iovs[1]; + exp_iovs[0].iov_base = tmp[0]; + exp_iovs[0].iov_len = sizeof(tmp[0]); + exp_iovs[1].iov_base = buf; + exp_iovs[1].iov_len = 2048; + + dst_iovs[0].iov_base = tmp[1]; + dst_iovs[0].iov_len = sizeof(tmp[1]); + src_iovs[0].iov_base = tmp[0]; + src_iovs[0].iov_len = sizeof(tmp[0]); + rc = spdk_accel_append_copy(&seq, ioch, &dst_iovs[0], 1, NULL, NULL, + &src_iovs[0], 1, NULL, NULL, 0, + ut_sequence_step_cb, &completed); + CU_ASSERT_EQUAL(rc, 0); + + dst_iovs[1].iov_base = buf; + dst_iovs[1].iov_len = 2048; + src_iovs[1].iov_base = tmp[1]; + src_iovs[1].iov_len = sizeof(tmp[1]); + rc = spdk_accel_append_decompress(&seq, ioch, &dst_iovs[1], 1, NULL, NULL, + &src_iovs[1], 1, NULL, NULL, 0, + ut_sequence_step_cb, &completed); + CU_ASSERT_EQUAL(rc, 0); + + ut_seq.complete = false; + rc = spdk_accel_sequence_finish(seq, ut_sequence_complete_cb, &ut_seq); + CU_ASSERT_EQUAL(rc, 0); + + poll_threads(); + + CU_ASSERT_EQUAL(completed, 2); + CU_ASSERT(ut_seq.complete); + CU_ASSERT_EQUAL(ut_seq.status, 0); + CU_ASSERT_EQUAL(g_seq_operations[ACCEL_OPC_COPY].count, 0); + CU_ASSERT_EQUAL(g_seq_operations[ACCEL_OPC_DECOMPRESS].count, 1); + + /* Check that a copy operation at the end is removed too */ + seq = NULL; + completed = 0; + g_seq_operations[ACCEL_OPC_COPY].count = 0; + g_seq_operations[ACCEL_OPC_DECOMPRESS].count = 0; + g_seq_operations[ACCEL_OPC_DECOMPRESS].src_iovs = &exp_iovs[0]; + g_seq_operations[ACCEL_OPC_DECOMPRESS].dst_iovs = &exp_iovs[1]; + exp_iovs[0].iov_base = tmp[0]; + exp_iovs[0].iov_len = sizeof(tmp[0]); + exp_iovs[1].iov_base = buf; + exp_iovs[1].iov_len = 2048; + + dst_iovs[0].iov_base = tmp[1]; + dst_iovs[0].iov_len = 2048; + src_iovs[0].iov_base = tmp[0]; + src_iovs[0].iov_len = sizeof(tmp[0]); + rc = spdk_accel_append_decompress(&seq, ioch, &dst_iovs[0], 1, NULL, NULL, + &src_iovs[0], 1, NULL, NULL, 0, + ut_sequence_step_cb, &completed); + CU_ASSERT_EQUAL(rc, 0); + + dst_iovs[1].iov_base = buf; + dst_iovs[1].iov_len = 2048; + src_iovs[1].iov_base = tmp[1]; + src_iovs[1].iov_len = 2048; + rc = spdk_accel_append_copy(&seq, ioch, &dst_iovs[1], 1, NULL, NULL, + &src_iovs[1], 1, NULL, NULL, 0, + ut_sequence_step_cb, &completed); + CU_ASSERT_EQUAL(rc, 0); + + ut_seq.complete = false; + rc = spdk_accel_sequence_finish(seq, ut_sequence_complete_cb, &ut_seq); + CU_ASSERT_EQUAL(rc, 0); + + poll_threads(); + + CU_ASSERT_EQUAL(completed, 2); + CU_ASSERT(ut_seq.complete); + CU_ASSERT_EQUAL(ut_seq.status, 0); + CU_ASSERT_EQUAL(g_seq_operations[ACCEL_OPC_COPY].count, 0); + CU_ASSERT_EQUAL(g_seq_operations[ACCEL_OPC_DECOMPRESS].count, 1); + + /* Check a copy operation both at the beginning and the end */ + seq = NULL; + completed = 0; + g_seq_operations[ACCEL_OPC_COPY].count = 0; + g_seq_operations[ACCEL_OPC_DECOMPRESS].count = 0; + g_seq_operations[ACCEL_OPC_DECOMPRESS].src_iovs = &exp_iovs[0]; + g_seq_operations[ACCEL_OPC_DECOMPRESS].dst_iovs = &exp_iovs[1]; + exp_iovs[0].iov_base = tmp[0]; + exp_iovs[0].iov_len = sizeof(tmp[0]); + exp_iovs[1].iov_base = buf; + exp_iovs[1].iov_len = 2048; + + dst_iovs[0].iov_base = tmp[1]; + dst_iovs[0].iov_len = sizeof(tmp[1]); + src_iovs[0].iov_base = tmp[0]; + src_iovs[0].iov_len = sizeof(tmp[0]); + rc = spdk_accel_append_copy(&seq, ioch, &dst_iovs[0], 1, NULL, NULL, + &src_iovs[0], 1, NULL, NULL, 0, + ut_sequence_step_cb, &completed); + CU_ASSERT_EQUAL(rc, 0); + + dst_iovs[1].iov_base = tmp[2]; + dst_iovs[1].iov_len = 2048; + src_iovs[1].iov_base = tmp[1]; + src_iovs[1].iov_len = sizeof(tmp[1]); + rc = spdk_accel_append_decompress(&seq, ioch, &dst_iovs[1], 1, NULL, NULL, + &src_iovs[1], 1, NULL, NULL, 0, + ut_sequence_step_cb, &completed); + CU_ASSERT_EQUAL(rc, 0); + + dst_iovs[2].iov_base = buf; + dst_iovs[2].iov_len = 2048; + src_iovs[2].iov_base = tmp[2]; + src_iovs[2].iov_len = 2048; + rc = spdk_accel_append_copy(&seq, ioch, &dst_iovs[2], 1, NULL, NULL, + &src_iovs[2], 1, NULL, NULL, 0, + ut_sequence_step_cb, &completed); + CU_ASSERT_EQUAL(rc, 0); + + ut_seq.complete = false; + rc = spdk_accel_sequence_finish(seq, ut_sequence_complete_cb, &ut_seq); + CU_ASSERT_EQUAL(rc, 0); + + poll_threads(); + + CU_ASSERT_EQUAL(completed, 3); + CU_ASSERT(ut_seq.complete); + CU_ASSERT_EQUAL(ut_seq.status, 0); + CU_ASSERT_EQUAL(g_seq_operations[ACCEL_OPC_COPY].count, 0); + CU_ASSERT_EQUAL(g_seq_operations[ACCEL_OPC_DECOMPRESS].count, 1); + + /* Check decompress + copy + decompress + copy */ + seq = NULL; + completed = 0; + g_seq_operations[ACCEL_OPC_COPY].count = 0; + g_seq_operations[ACCEL_OPC_DECOMPRESS].count = 0; + g_seq_operations[ACCEL_OPC_DECOMPRESS].src_iovs = NULL; + g_seq_operations[ACCEL_OPC_DECOMPRESS].dst_iovs = NULL; + + dst_iovs[0].iov_base = tmp[1]; + dst_iovs[0].iov_len = sizeof(tmp[1]); + src_iovs[0].iov_base = tmp[0]; + src_iovs[0].iov_len = sizeof(tmp[0]); + rc = spdk_accel_append_decompress(&seq, ioch, &dst_iovs[0], 1, NULL, NULL, + &src_iovs[0], 1, NULL, NULL, 0, + ut_sequence_step_cb, &completed); + CU_ASSERT_EQUAL(rc, 0); + + dst_iovs[1].iov_base = tmp[2]; + dst_iovs[1].iov_len = 2048; + src_iovs[1].iov_base = tmp[1]; + src_iovs[1].iov_len = sizeof(tmp[1]); + rc = spdk_accel_append_copy(&seq, ioch, &dst_iovs[1], 1, NULL, NULL, + &src_iovs[1], 1, NULL, NULL, 0, + ut_sequence_step_cb, &completed); + CU_ASSERT_EQUAL(rc, 0); + + dst_iovs[2].iov_base = tmp[3]; + dst_iovs[2].iov_len = 1024; + src_iovs[2].iov_base = tmp[2]; + src_iovs[2].iov_len = 2048; + rc = spdk_accel_append_decompress(&seq, ioch, &dst_iovs[2], 1, NULL, NULL, + &src_iovs[2], 1, NULL, NULL, 0, + ut_sequence_step_cb, &completed); + CU_ASSERT_EQUAL(rc, 0); + + dst_iovs[3].iov_base = buf; + dst_iovs[3].iov_len = 1024; + src_iovs[3].iov_base = tmp[3]; + src_iovs[3].iov_len = 1024; + rc = spdk_accel_append_copy(&seq, ioch, &dst_iovs[3], 1, NULL, NULL, + &src_iovs[3], 1, NULL, NULL, 0, + ut_sequence_step_cb, &completed); + CU_ASSERT_EQUAL(rc, 0); + + ut_seq.complete = false; + rc = spdk_accel_sequence_finish(seq, ut_sequence_complete_cb, &ut_seq); + CU_ASSERT_EQUAL(rc, 0); + + poll_threads(); + + CU_ASSERT_EQUAL(completed, 4); + CU_ASSERT(ut_seq.complete); + CU_ASSERT_EQUAL(ut_seq.status, 0); + CU_ASSERT_EQUAL(g_seq_operations[ACCEL_OPC_COPY].count, 0); + CU_ASSERT_EQUAL(g_seq_operations[ACCEL_OPC_DECOMPRESS].count, 2); + + /* Check two copy operations - one of them should be removed, while the other should be + * executed normally */ + seq = NULL; + completed = 0; + g_seq_operations[ACCEL_OPC_COPY].count = 0; + + dst_iovs[0].iov_base = tmp[1]; + dst_iovs[0].iov_len = sizeof(tmp[1]); + src_iovs[0].iov_base = tmp[0]; + src_iovs[0].iov_len = sizeof(tmp[0]); + rc = spdk_accel_append_copy(&seq, ioch, &dst_iovs[0], 1, NULL, NULL, + &src_iovs[0], 1, NULL, NULL, 0, + ut_sequence_step_cb, &completed); + CU_ASSERT_EQUAL(rc, 0); + + dst_iovs[1].iov_base = buf; + dst_iovs[1].iov_len = sizeof(buf); + src_iovs[1].iov_base = tmp[1]; + src_iovs[1].iov_len = sizeof(tmp[1]); + rc = spdk_accel_append_copy(&seq, ioch, &dst_iovs[1], 1, NULL, NULL, + &src_iovs[1], 1, NULL, NULL, 0, + ut_sequence_step_cb, &completed); + CU_ASSERT_EQUAL(rc, 0); + + ut_seq.complete = false; + rc = spdk_accel_sequence_finish(seq, ut_sequence_complete_cb, &ut_seq); + CU_ASSERT_EQUAL(rc, 0); + + poll_threads(); + + CU_ASSERT_EQUAL(completed, 2); + CU_ASSERT(ut_seq.complete); + CU_ASSERT_EQUAL(ut_seq.status, 0); + CU_ASSERT_EQUAL(g_seq_operations[ACCEL_OPC_COPY].count, 1); + + /* Cleanup module pointers to make subsequent tests work correctly */ + for (i = 0; i < ACCEL_OPC_LAST; ++i) { + g_modules_opc[i] = modules[i]; + } + + g_seq_operations[ACCEL_OPC_DECOMPRESS].src_iovs = NULL; + g_seq_operations[ACCEL_OPC_DECOMPRESS].dst_iovs = NULL; + + spdk_put_io_channel(ioch); + poll_threads(); +} + static int test_sequence_setup(void) { @@ -1256,6 +1536,7 @@ main(int argc, char **argv) #ifdef SPDK_CONFIG_ISAL /* accel_sw requires isa-l for compression */ CU_ADD_TEST(seq_suite, test_sequence_decompress); #endif + CU_ADD_TEST(seq_suite, test_sequence_copy_elision); suite = CU_add_suite("accel", test_setup, test_cleanup); CU_ADD_TEST(suite, test_spdk_accel_task_complete);