accel: remove redundant copy operations

Operation sequence should always be treated as a whole, meaning that
users cannot rely on the contents of any intermediate buffers and should
only care about the buffer that's the destination of the whole
operation.  This allows us to remove some of those copy operations by
changing source / destination buffer of a preceding / following
operation.

If a sequence is using buffers from non-local memory domain, users can
append a copy operation to a sequence to specify a local destination
buffer.  If the module executing the operations is aware of memory
domains, this can avoid doing an extra spdk_memory_domain_pull_data().

Signed-off-by: Konrad Sztyber <konrad.sztyber@intel.com>
Change-Id: I93b94d46ee32700819e9e6f1c55350692db8a67a
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/15530
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
Reviewed-by: Aleksey Marchuk <alexeymar@nvidia.com>
This commit is contained in:
Konrad Sztyber 2022-11-17 18:39:58 +01:00 committed by Tomasz Zawadzki
parent 59f55d23f2
commit f778e8e53a
2 changed files with 360 additions and 1 deletions

View File

@ -815,11 +815,89 @@ accel_sequence_task_cb(void *cb_arg, int status)
accel_process_sequence(seq); accel_process_sequence(seq);
} }
static bool
accel_compare_iovs(struct iovec *iova, uint32_t iovacnt, struct iovec *iovb, uint32_t iovbcnt)
{
/* For now, just do a dumb check that the iovecs arrays are exactly the same */
if (iovacnt != iovbcnt) {
return false;
}
return memcmp(iova, iovb, sizeof(*iova) * iovacnt) == 0;
}
static void
accel_sequence_merge_tasks(struct spdk_accel_sequence *seq, struct spdk_accel_task *task,
struct spdk_accel_task **next_task)
{
struct spdk_accel_task *next = *next_task;
switch (task->op_code) {
case ACCEL_OPC_COPY:
/* We only allow changing src of operations that actually have a src, e.g. we never
* do it for fill. Theoretically, it is possible, but we'd have to be careful to
* change the src of the operation after fill (which in turn could also be a fill).
* So, for the sake of simplicity, skip this type of operations for now.
*/
if (next->op_code != ACCEL_OPC_DECOMPRESS &&
next->op_code != ACCEL_OPC_COPY) {
break;
}
if (task->dst_domain != next->src_domain) {
break;
}
if (!accel_compare_iovs(task->d.iovs, task->d.iovcnt,
next->s.iovs, next->s.iovcnt)) {
break;
}
next->s.iovs = task->s.iovs;
next->s.iovcnt = task->s.iovcnt;
next->src_domain = task->src_domain;
TAILQ_REMOVE(&seq->tasks, task, seq_link);
TAILQ_INSERT_TAIL(&seq->completed, task, seq_link);
break;
case ACCEL_OPC_DECOMPRESS:
/* We can only merge tasks when one of them is a copy */
if (next->op_code != ACCEL_OPC_COPY) {
break;
}
if (task->dst_domain != next->src_domain) {
break;
}
if (!accel_compare_iovs(task->d.iovs, task->d.iovcnt,
next->s.iovs, next->s.iovcnt)) {
break;
}
task->d.iovs = next->d.iovs;
task->d.iovcnt = next->d.iovcnt;
task->dst_domain = next->dst_domain;
/* We're removing next_task from the tasks queue, so we need to update its pointer,
* so that the TAILQ_FOREACH_SAFE() loop below works correctly */
*next_task = TAILQ_NEXT(next, seq_link);
TAILQ_REMOVE(&seq->tasks, next, seq_link);
TAILQ_INSERT_TAIL(&seq->completed, next, seq_link);
break;
case ACCEL_OPC_FILL:
break;
default:
assert(0 && "bad opcode");
break;
}
}
int int
spdk_accel_sequence_finish(struct spdk_accel_sequence *seq, spdk_accel_sequence_finish(struct spdk_accel_sequence *seq,
spdk_accel_completion_cb cb_fn, void *cb_arg) spdk_accel_completion_cb cb_fn, void *cb_arg)
{ {
struct spdk_accel_task *task; struct spdk_accel_task *task, *next;
/* Try to remove any copy operations if possible */
TAILQ_FOREACH_SAFE(task, &seq->tasks, seq_link, next) {
if (next == NULL) {
break;
}
accel_sequence_merge_tasks(seq, task, &next);
}
/* Since we store copy operations' buffers as iovecs, we need to convert them to scalar /* Since we store copy operations' buffers as iovecs, we need to convert them to scalar
* buffers, as that's what accel modules expect * buffers, as that's what accel modules expect

View File

@ -893,6 +893,11 @@ test_sequence_append_error(void)
struct ut_sequence_operation { struct ut_sequence_operation {
int complete_status; int complete_status;
int submit_status; int submit_status;
int count;
struct iovec *src_iovs;
uint32_t src_iovcnt;
struct iovec *dst_iovs;
uint32_t dst_iovcnt;
}; };
static struct ut_sequence_operation g_seq_operations[ACCEL_OPC_LAST]; static struct ut_sequence_operation g_seq_operations[ACCEL_OPC_LAST];
@ -902,6 +907,18 @@ ut_sequnce_submit_tasks(struct spdk_io_channel *ch, struct spdk_accel_task *task
{ {
struct ut_sequence_operation *op = &g_seq_operations[task->op_code]; struct ut_sequence_operation *op = &g_seq_operations[task->op_code];
if (op->src_iovs != NULL) {
CU_ASSERT_EQUAL(task->s.iovcnt, op->src_iovcnt);
CU_ASSERT_EQUAL(memcmp(task->s.iovs, op->src_iovs,
sizeof(struct iovec) * op->src_iovcnt), 0);
}
if (op->dst_iovs != NULL) {
CU_ASSERT_EQUAL(task->d.iovcnt, op->dst_iovcnt);
CU_ASSERT_EQUAL(memcmp(task->d.iovs, op->dst_iovs,
sizeof(struct iovec) * op->dst_iovcnt), 0);
}
op->count++;
if (op->submit_status != 0) { if (op->submit_status != 0) {
return op->submit_status; return op->submit_status;
} }
@ -1192,6 +1209,269 @@ test_sequence_decompress(void)
} }
#endif #endif
static void
test_sequence_copy_elision(void)
{
struct spdk_accel_sequence *seq = NULL;
struct spdk_io_channel *ioch;
struct ut_sequence ut_seq;
struct iovec src_iovs[4], dst_iovs[4], exp_iovs[2];
char buf[4096], tmp[4][4096];
struct spdk_accel_module_if *modules[ACCEL_OPC_LAST];
int i, rc, completed;
ioch = spdk_accel_get_io_channel();
SPDK_CU_ASSERT_FATAL(ioch != NULL);
/* Override the submit_tasks function */
g_module.submit_tasks = ut_sequnce_submit_tasks;
for (i = 0; i < ACCEL_OPC_LAST; ++i) {
g_seq_operations[i].complete_status = 0;
g_seq_operations[i].submit_status = 0;
g_seq_operations[i].count = 0;
modules[i] = g_modules_opc[i];
g_modules_opc[i] = &g_module;
}
/* Check that a copy operation at the beginning is removed */
seq = NULL;
completed = 0;
g_seq_operations[ACCEL_OPC_DECOMPRESS].dst_iovcnt = 1;
g_seq_operations[ACCEL_OPC_DECOMPRESS].src_iovcnt = 1;
g_seq_operations[ACCEL_OPC_DECOMPRESS].src_iovs = &exp_iovs[0];
g_seq_operations[ACCEL_OPC_DECOMPRESS].dst_iovs = &exp_iovs[1];
exp_iovs[0].iov_base = tmp[0];
exp_iovs[0].iov_len = sizeof(tmp[0]);
exp_iovs[1].iov_base = buf;
exp_iovs[1].iov_len = 2048;
dst_iovs[0].iov_base = tmp[1];
dst_iovs[0].iov_len = sizeof(tmp[1]);
src_iovs[0].iov_base = tmp[0];
src_iovs[0].iov_len = sizeof(tmp[0]);
rc = spdk_accel_append_copy(&seq, ioch, &dst_iovs[0], 1, NULL, NULL,
&src_iovs[0], 1, NULL, NULL, 0,
ut_sequence_step_cb, &completed);
CU_ASSERT_EQUAL(rc, 0);
dst_iovs[1].iov_base = buf;
dst_iovs[1].iov_len = 2048;
src_iovs[1].iov_base = tmp[1];
src_iovs[1].iov_len = sizeof(tmp[1]);
rc = spdk_accel_append_decompress(&seq, ioch, &dst_iovs[1], 1, NULL, NULL,
&src_iovs[1], 1, NULL, NULL, 0,
ut_sequence_step_cb, &completed);
CU_ASSERT_EQUAL(rc, 0);
ut_seq.complete = false;
rc = spdk_accel_sequence_finish(seq, ut_sequence_complete_cb, &ut_seq);
CU_ASSERT_EQUAL(rc, 0);
poll_threads();
CU_ASSERT_EQUAL(completed, 2);
CU_ASSERT(ut_seq.complete);
CU_ASSERT_EQUAL(ut_seq.status, 0);
CU_ASSERT_EQUAL(g_seq_operations[ACCEL_OPC_COPY].count, 0);
CU_ASSERT_EQUAL(g_seq_operations[ACCEL_OPC_DECOMPRESS].count, 1);
/* Check that a copy operation at the end is removed too */
seq = NULL;
completed = 0;
g_seq_operations[ACCEL_OPC_COPY].count = 0;
g_seq_operations[ACCEL_OPC_DECOMPRESS].count = 0;
g_seq_operations[ACCEL_OPC_DECOMPRESS].src_iovs = &exp_iovs[0];
g_seq_operations[ACCEL_OPC_DECOMPRESS].dst_iovs = &exp_iovs[1];
exp_iovs[0].iov_base = tmp[0];
exp_iovs[0].iov_len = sizeof(tmp[0]);
exp_iovs[1].iov_base = buf;
exp_iovs[1].iov_len = 2048;
dst_iovs[0].iov_base = tmp[1];
dst_iovs[0].iov_len = 2048;
src_iovs[0].iov_base = tmp[0];
src_iovs[0].iov_len = sizeof(tmp[0]);
rc = spdk_accel_append_decompress(&seq, ioch, &dst_iovs[0], 1, NULL, NULL,
&src_iovs[0], 1, NULL, NULL, 0,
ut_sequence_step_cb, &completed);
CU_ASSERT_EQUAL(rc, 0);
dst_iovs[1].iov_base = buf;
dst_iovs[1].iov_len = 2048;
src_iovs[1].iov_base = tmp[1];
src_iovs[1].iov_len = 2048;
rc = spdk_accel_append_copy(&seq, ioch, &dst_iovs[1], 1, NULL, NULL,
&src_iovs[1], 1, NULL, NULL, 0,
ut_sequence_step_cb, &completed);
CU_ASSERT_EQUAL(rc, 0);
ut_seq.complete = false;
rc = spdk_accel_sequence_finish(seq, ut_sequence_complete_cb, &ut_seq);
CU_ASSERT_EQUAL(rc, 0);
poll_threads();
CU_ASSERT_EQUAL(completed, 2);
CU_ASSERT(ut_seq.complete);
CU_ASSERT_EQUAL(ut_seq.status, 0);
CU_ASSERT_EQUAL(g_seq_operations[ACCEL_OPC_COPY].count, 0);
CU_ASSERT_EQUAL(g_seq_operations[ACCEL_OPC_DECOMPRESS].count, 1);
/* Check a copy operation both at the beginning and the end */
seq = NULL;
completed = 0;
g_seq_operations[ACCEL_OPC_COPY].count = 0;
g_seq_operations[ACCEL_OPC_DECOMPRESS].count = 0;
g_seq_operations[ACCEL_OPC_DECOMPRESS].src_iovs = &exp_iovs[0];
g_seq_operations[ACCEL_OPC_DECOMPRESS].dst_iovs = &exp_iovs[1];
exp_iovs[0].iov_base = tmp[0];
exp_iovs[0].iov_len = sizeof(tmp[0]);
exp_iovs[1].iov_base = buf;
exp_iovs[1].iov_len = 2048;
dst_iovs[0].iov_base = tmp[1];
dst_iovs[0].iov_len = sizeof(tmp[1]);
src_iovs[0].iov_base = tmp[0];
src_iovs[0].iov_len = sizeof(tmp[0]);
rc = spdk_accel_append_copy(&seq, ioch, &dst_iovs[0], 1, NULL, NULL,
&src_iovs[0], 1, NULL, NULL, 0,
ut_sequence_step_cb, &completed);
CU_ASSERT_EQUAL(rc, 0);
dst_iovs[1].iov_base = tmp[2];
dst_iovs[1].iov_len = 2048;
src_iovs[1].iov_base = tmp[1];
src_iovs[1].iov_len = sizeof(tmp[1]);
rc = spdk_accel_append_decompress(&seq, ioch, &dst_iovs[1], 1, NULL, NULL,
&src_iovs[1], 1, NULL, NULL, 0,
ut_sequence_step_cb, &completed);
CU_ASSERT_EQUAL(rc, 0);
dst_iovs[2].iov_base = buf;
dst_iovs[2].iov_len = 2048;
src_iovs[2].iov_base = tmp[2];
src_iovs[2].iov_len = 2048;
rc = spdk_accel_append_copy(&seq, ioch, &dst_iovs[2], 1, NULL, NULL,
&src_iovs[2], 1, NULL, NULL, 0,
ut_sequence_step_cb, &completed);
CU_ASSERT_EQUAL(rc, 0);
ut_seq.complete = false;
rc = spdk_accel_sequence_finish(seq, ut_sequence_complete_cb, &ut_seq);
CU_ASSERT_EQUAL(rc, 0);
poll_threads();
CU_ASSERT_EQUAL(completed, 3);
CU_ASSERT(ut_seq.complete);
CU_ASSERT_EQUAL(ut_seq.status, 0);
CU_ASSERT_EQUAL(g_seq_operations[ACCEL_OPC_COPY].count, 0);
CU_ASSERT_EQUAL(g_seq_operations[ACCEL_OPC_DECOMPRESS].count, 1);
/* Check decompress + copy + decompress + copy */
seq = NULL;
completed = 0;
g_seq_operations[ACCEL_OPC_COPY].count = 0;
g_seq_operations[ACCEL_OPC_DECOMPRESS].count = 0;
g_seq_operations[ACCEL_OPC_DECOMPRESS].src_iovs = NULL;
g_seq_operations[ACCEL_OPC_DECOMPRESS].dst_iovs = NULL;
dst_iovs[0].iov_base = tmp[1];
dst_iovs[0].iov_len = sizeof(tmp[1]);
src_iovs[0].iov_base = tmp[0];
src_iovs[0].iov_len = sizeof(tmp[0]);
rc = spdk_accel_append_decompress(&seq, ioch, &dst_iovs[0], 1, NULL, NULL,
&src_iovs[0], 1, NULL, NULL, 0,
ut_sequence_step_cb, &completed);
CU_ASSERT_EQUAL(rc, 0);
dst_iovs[1].iov_base = tmp[2];
dst_iovs[1].iov_len = 2048;
src_iovs[1].iov_base = tmp[1];
src_iovs[1].iov_len = sizeof(tmp[1]);
rc = spdk_accel_append_copy(&seq, ioch, &dst_iovs[1], 1, NULL, NULL,
&src_iovs[1], 1, NULL, NULL, 0,
ut_sequence_step_cb, &completed);
CU_ASSERT_EQUAL(rc, 0);
dst_iovs[2].iov_base = tmp[3];
dst_iovs[2].iov_len = 1024;
src_iovs[2].iov_base = tmp[2];
src_iovs[2].iov_len = 2048;
rc = spdk_accel_append_decompress(&seq, ioch, &dst_iovs[2], 1, NULL, NULL,
&src_iovs[2], 1, NULL, NULL, 0,
ut_sequence_step_cb, &completed);
CU_ASSERT_EQUAL(rc, 0);
dst_iovs[3].iov_base = buf;
dst_iovs[3].iov_len = 1024;
src_iovs[3].iov_base = tmp[3];
src_iovs[3].iov_len = 1024;
rc = spdk_accel_append_copy(&seq, ioch, &dst_iovs[3], 1, NULL, NULL,
&src_iovs[3], 1, NULL, NULL, 0,
ut_sequence_step_cb, &completed);
CU_ASSERT_EQUAL(rc, 0);
ut_seq.complete = false;
rc = spdk_accel_sequence_finish(seq, ut_sequence_complete_cb, &ut_seq);
CU_ASSERT_EQUAL(rc, 0);
poll_threads();
CU_ASSERT_EQUAL(completed, 4);
CU_ASSERT(ut_seq.complete);
CU_ASSERT_EQUAL(ut_seq.status, 0);
CU_ASSERT_EQUAL(g_seq_operations[ACCEL_OPC_COPY].count, 0);
CU_ASSERT_EQUAL(g_seq_operations[ACCEL_OPC_DECOMPRESS].count, 2);
/* Check two copy operations - one of them should be removed, while the other should be
* executed normally */
seq = NULL;
completed = 0;
g_seq_operations[ACCEL_OPC_COPY].count = 0;
dst_iovs[0].iov_base = tmp[1];
dst_iovs[0].iov_len = sizeof(tmp[1]);
src_iovs[0].iov_base = tmp[0];
src_iovs[0].iov_len = sizeof(tmp[0]);
rc = spdk_accel_append_copy(&seq, ioch, &dst_iovs[0], 1, NULL, NULL,
&src_iovs[0], 1, NULL, NULL, 0,
ut_sequence_step_cb, &completed);
CU_ASSERT_EQUAL(rc, 0);
dst_iovs[1].iov_base = buf;
dst_iovs[1].iov_len = sizeof(buf);
src_iovs[1].iov_base = tmp[1];
src_iovs[1].iov_len = sizeof(tmp[1]);
rc = spdk_accel_append_copy(&seq, ioch, &dst_iovs[1], 1, NULL, NULL,
&src_iovs[1], 1, NULL, NULL, 0,
ut_sequence_step_cb, &completed);
CU_ASSERT_EQUAL(rc, 0);
ut_seq.complete = false;
rc = spdk_accel_sequence_finish(seq, ut_sequence_complete_cb, &ut_seq);
CU_ASSERT_EQUAL(rc, 0);
poll_threads();
CU_ASSERT_EQUAL(completed, 2);
CU_ASSERT(ut_seq.complete);
CU_ASSERT_EQUAL(ut_seq.status, 0);
CU_ASSERT_EQUAL(g_seq_operations[ACCEL_OPC_COPY].count, 1);
/* Cleanup module pointers to make subsequent tests work correctly */
for (i = 0; i < ACCEL_OPC_LAST; ++i) {
g_modules_opc[i] = modules[i];
}
g_seq_operations[ACCEL_OPC_DECOMPRESS].src_iovs = NULL;
g_seq_operations[ACCEL_OPC_DECOMPRESS].dst_iovs = NULL;
spdk_put_io_channel(ioch);
poll_threads();
}
static int static int
test_sequence_setup(void) test_sequence_setup(void)
{ {
@ -1256,6 +1536,7 @@ main(int argc, char **argv)
#ifdef SPDK_CONFIG_ISAL /* accel_sw requires isa-l for compression */ #ifdef SPDK_CONFIG_ISAL /* accel_sw requires isa-l for compression */
CU_ADD_TEST(seq_suite, test_sequence_decompress); CU_ADD_TEST(seq_suite, test_sequence_decompress);
#endif #endif
CU_ADD_TEST(seq_suite, test_sequence_copy_elision);
suite = CU_add_suite("accel", test_setup, test_cleanup); suite = CU_add_suite("accel", test_setup, test_cleanup);
CU_ADD_TEST(suite, test_spdk_accel_task_complete); CU_ADD_TEST(suite, test_spdk_accel_task_complete);