accel: remove redundant copy operations
Operation sequence should always be treated as a whole, meaning that users cannot rely on the contents of any intermediate buffers and should only care about the buffer that's the destination of the whole operation. This allows us to remove some of those copy operations by changing source / destination buffer of a preceding / following operation. If a sequence is using buffers from non-local memory domain, users can append a copy operation to a sequence to specify a local destination buffer. If the module executing the operations is aware of memory domains, this can avoid doing an extra spdk_memory_domain_pull_data(). Signed-off-by: Konrad Sztyber <konrad.sztyber@intel.com> Change-Id: I93b94d46ee32700819e9e6f1c55350692db8a67a Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/15530 Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Ben Walker <benjamin.walker@intel.com> Reviewed-by: Jim Harris <james.r.harris@intel.com> Reviewed-by: Aleksey Marchuk <alexeymar@nvidia.com>
This commit is contained in:
parent
59f55d23f2
commit
f778e8e53a
@ -815,11 +815,89 @@ accel_sequence_task_cb(void *cb_arg, int status)
|
||||
accel_process_sequence(seq);
|
||||
}
|
||||
|
||||
static bool
|
||||
accel_compare_iovs(struct iovec *iova, uint32_t iovacnt, struct iovec *iovb, uint32_t iovbcnt)
|
||||
{
|
||||
/* For now, just do a dumb check that the iovecs arrays are exactly the same */
|
||||
if (iovacnt != iovbcnt) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return memcmp(iova, iovb, sizeof(*iova) * iovacnt) == 0;
|
||||
}
|
||||
|
||||
static void
|
||||
accel_sequence_merge_tasks(struct spdk_accel_sequence *seq, struct spdk_accel_task *task,
|
||||
struct spdk_accel_task **next_task)
|
||||
{
|
||||
struct spdk_accel_task *next = *next_task;
|
||||
|
||||
switch (task->op_code) {
|
||||
case ACCEL_OPC_COPY:
|
||||
/* We only allow changing src of operations that actually have a src, e.g. we never
|
||||
* do it for fill. Theoretically, it is possible, but we'd have to be careful to
|
||||
* change the src of the operation after fill (which in turn could also be a fill).
|
||||
* So, for the sake of simplicity, skip this type of operations for now.
|
||||
*/
|
||||
if (next->op_code != ACCEL_OPC_DECOMPRESS &&
|
||||
next->op_code != ACCEL_OPC_COPY) {
|
||||
break;
|
||||
}
|
||||
if (task->dst_domain != next->src_domain) {
|
||||
break;
|
||||
}
|
||||
if (!accel_compare_iovs(task->d.iovs, task->d.iovcnt,
|
||||
next->s.iovs, next->s.iovcnt)) {
|
||||
break;
|
||||
}
|
||||
next->s.iovs = task->s.iovs;
|
||||
next->s.iovcnt = task->s.iovcnt;
|
||||
next->src_domain = task->src_domain;
|
||||
TAILQ_REMOVE(&seq->tasks, task, seq_link);
|
||||
TAILQ_INSERT_TAIL(&seq->completed, task, seq_link);
|
||||
break;
|
||||
case ACCEL_OPC_DECOMPRESS:
|
||||
/* We can only merge tasks when one of them is a copy */
|
||||
if (next->op_code != ACCEL_OPC_COPY) {
|
||||
break;
|
||||
}
|
||||
if (task->dst_domain != next->src_domain) {
|
||||
break;
|
||||
}
|
||||
if (!accel_compare_iovs(task->d.iovs, task->d.iovcnt,
|
||||
next->s.iovs, next->s.iovcnt)) {
|
||||
break;
|
||||
}
|
||||
task->d.iovs = next->d.iovs;
|
||||
task->d.iovcnt = next->d.iovcnt;
|
||||
task->dst_domain = next->dst_domain;
|
||||
/* We're removing next_task from the tasks queue, so we need to update its pointer,
|
||||
* so that the TAILQ_FOREACH_SAFE() loop below works correctly */
|
||||
*next_task = TAILQ_NEXT(next, seq_link);
|
||||
TAILQ_REMOVE(&seq->tasks, next, seq_link);
|
||||
TAILQ_INSERT_TAIL(&seq->completed, next, seq_link);
|
||||
break;
|
||||
case ACCEL_OPC_FILL:
|
||||
break;
|
||||
default:
|
||||
assert(0 && "bad opcode");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
spdk_accel_sequence_finish(struct spdk_accel_sequence *seq,
|
||||
spdk_accel_completion_cb cb_fn, void *cb_arg)
|
||||
{
|
||||
struct spdk_accel_task *task;
|
||||
struct spdk_accel_task *task, *next;
|
||||
|
||||
/* Try to remove any copy operations if possible */
|
||||
TAILQ_FOREACH_SAFE(task, &seq->tasks, seq_link, next) {
|
||||
if (next == NULL) {
|
||||
break;
|
||||
}
|
||||
accel_sequence_merge_tasks(seq, task, &next);
|
||||
}
|
||||
|
||||
/* Since we store copy operations' buffers as iovecs, we need to convert them to scalar
|
||||
* buffers, as that's what accel modules expect
|
||||
|
@ -893,6 +893,11 @@ test_sequence_append_error(void)
|
||||
struct ut_sequence_operation {
|
||||
int complete_status;
|
||||
int submit_status;
|
||||
int count;
|
||||
struct iovec *src_iovs;
|
||||
uint32_t src_iovcnt;
|
||||
struct iovec *dst_iovs;
|
||||
uint32_t dst_iovcnt;
|
||||
};
|
||||
|
||||
static struct ut_sequence_operation g_seq_operations[ACCEL_OPC_LAST];
|
||||
@ -902,6 +907,18 @@ ut_sequnce_submit_tasks(struct spdk_io_channel *ch, struct spdk_accel_task *task
|
||||
{
|
||||
struct ut_sequence_operation *op = &g_seq_operations[task->op_code];
|
||||
|
||||
if (op->src_iovs != NULL) {
|
||||
CU_ASSERT_EQUAL(task->s.iovcnt, op->src_iovcnt);
|
||||
CU_ASSERT_EQUAL(memcmp(task->s.iovs, op->src_iovs,
|
||||
sizeof(struct iovec) * op->src_iovcnt), 0);
|
||||
}
|
||||
if (op->dst_iovs != NULL) {
|
||||
CU_ASSERT_EQUAL(task->d.iovcnt, op->dst_iovcnt);
|
||||
CU_ASSERT_EQUAL(memcmp(task->d.iovs, op->dst_iovs,
|
||||
sizeof(struct iovec) * op->dst_iovcnt), 0);
|
||||
}
|
||||
|
||||
op->count++;
|
||||
if (op->submit_status != 0) {
|
||||
return op->submit_status;
|
||||
}
|
||||
@ -1192,6 +1209,269 @@ test_sequence_decompress(void)
|
||||
}
|
||||
#endif
|
||||
|
||||
static void
|
||||
test_sequence_copy_elision(void)
|
||||
{
|
||||
struct spdk_accel_sequence *seq = NULL;
|
||||
struct spdk_io_channel *ioch;
|
||||
struct ut_sequence ut_seq;
|
||||
struct iovec src_iovs[4], dst_iovs[4], exp_iovs[2];
|
||||
char buf[4096], tmp[4][4096];
|
||||
struct spdk_accel_module_if *modules[ACCEL_OPC_LAST];
|
||||
int i, rc, completed;
|
||||
|
||||
ioch = spdk_accel_get_io_channel();
|
||||
SPDK_CU_ASSERT_FATAL(ioch != NULL);
|
||||
|
||||
/* Override the submit_tasks function */
|
||||
g_module.submit_tasks = ut_sequnce_submit_tasks;
|
||||
for (i = 0; i < ACCEL_OPC_LAST; ++i) {
|
||||
g_seq_operations[i].complete_status = 0;
|
||||
g_seq_operations[i].submit_status = 0;
|
||||
g_seq_operations[i].count = 0;
|
||||
|
||||
modules[i] = g_modules_opc[i];
|
||||
g_modules_opc[i] = &g_module;
|
||||
}
|
||||
|
||||
/* Check that a copy operation at the beginning is removed */
|
||||
seq = NULL;
|
||||
completed = 0;
|
||||
g_seq_operations[ACCEL_OPC_DECOMPRESS].dst_iovcnt = 1;
|
||||
g_seq_operations[ACCEL_OPC_DECOMPRESS].src_iovcnt = 1;
|
||||
g_seq_operations[ACCEL_OPC_DECOMPRESS].src_iovs = &exp_iovs[0];
|
||||
g_seq_operations[ACCEL_OPC_DECOMPRESS].dst_iovs = &exp_iovs[1];
|
||||
exp_iovs[0].iov_base = tmp[0];
|
||||
exp_iovs[0].iov_len = sizeof(tmp[0]);
|
||||
exp_iovs[1].iov_base = buf;
|
||||
exp_iovs[1].iov_len = 2048;
|
||||
|
||||
dst_iovs[0].iov_base = tmp[1];
|
||||
dst_iovs[0].iov_len = sizeof(tmp[1]);
|
||||
src_iovs[0].iov_base = tmp[0];
|
||||
src_iovs[0].iov_len = sizeof(tmp[0]);
|
||||
rc = spdk_accel_append_copy(&seq, ioch, &dst_iovs[0], 1, NULL, NULL,
|
||||
&src_iovs[0], 1, NULL, NULL, 0,
|
||||
ut_sequence_step_cb, &completed);
|
||||
CU_ASSERT_EQUAL(rc, 0);
|
||||
|
||||
dst_iovs[1].iov_base = buf;
|
||||
dst_iovs[1].iov_len = 2048;
|
||||
src_iovs[1].iov_base = tmp[1];
|
||||
src_iovs[1].iov_len = sizeof(tmp[1]);
|
||||
rc = spdk_accel_append_decompress(&seq, ioch, &dst_iovs[1], 1, NULL, NULL,
|
||||
&src_iovs[1], 1, NULL, NULL, 0,
|
||||
ut_sequence_step_cb, &completed);
|
||||
CU_ASSERT_EQUAL(rc, 0);
|
||||
|
||||
ut_seq.complete = false;
|
||||
rc = spdk_accel_sequence_finish(seq, ut_sequence_complete_cb, &ut_seq);
|
||||
CU_ASSERT_EQUAL(rc, 0);
|
||||
|
||||
poll_threads();
|
||||
|
||||
CU_ASSERT_EQUAL(completed, 2);
|
||||
CU_ASSERT(ut_seq.complete);
|
||||
CU_ASSERT_EQUAL(ut_seq.status, 0);
|
||||
CU_ASSERT_EQUAL(g_seq_operations[ACCEL_OPC_COPY].count, 0);
|
||||
CU_ASSERT_EQUAL(g_seq_operations[ACCEL_OPC_DECOMPRESS].count, 1);
|
||||
|
||||
/* Check that a copy operation at the end is removed too */
|
||||
seq = NULL;
|
||||
completed = 0;
|
||||
g_seq_operations[ACCEL_OPC_COPY].count = 0;
|
||||
g_seq_operations[ACCEL_OPC_DECOMPRESS].count = 0;
|
||||
g_seq_operations[ACCEL_OPC_DECOMPRESS].src_iovs = &exp_iovs[0];
|
||||
g_seq_operations[ACCEL_OPC_DECOMPRESS].dst_iovs = &exp_iovs[1];
|
||||
exp_iovs[0].iov_base = tmp[0];
|
||||
exp_iovs[0].iov_len = sizeof(tmp[0]);
|
||||
exp_iovs[1].iov_base = buf;
|
||||
exp_iovs[1].iov_len = 2048;
|
||||
|
||||
dst_iovs[0].iov_base = tmp[1];
|
||||
dst_iovs[0].iov_len = 2048;
|
||||
src_iovs[0].iov_base = tmp[0];
|
||||
src_iovs[0].iov_len = sizeof(tmp[0]);
|
||||
rc = spdk_accel_append_decompress(&seq, ioch, &dst_iovs[0], 1, NULL, NULL,
|
||||
&src_iovs[0], 1, NULL, NULL, 0,
|
||||
ut_sequence_step_cb, &completed);
|
||||
CU_ASSERT_EQUAL(rc, 0);
|
||||
|
||||
dst_iovs[1].iov_base = buf;
|
||||
dst_iovs[1].iov_len = 2048;
|
||||
src_iovs[1].iov_base = tmp[1];
|
||||
src_iovs[1].iov_len = 2048;
|
||||
rc = spdk_accel_append_copy(&seq, ioch, &dst_iovs[1], 1, NULL, NULL,
|
||||
&src_iovs[1], 1, NULL, NULL, 0,
|
||||
ut_sequence_step_cb, &completed);
|
||||
CU_ASSERT_EQUAL(rc, 0);
|
||||
|
||||
ut_seq.complete = false;
|
||||
rc = spdk_accel_sequence_finish(seq, ut_sequence_complete_cb, &ut_seq);
|
||||
CU_ASSERT_EQUAL(rc, 0);
|
||||
|
||||
poll_threads();
|
||||
|
||||
CU_ASSERT_EQUAL(completed, 2);
|
||||
CU_ASSERT(ut_seq.complete);
|
||||
CU_ASSERT_EQUAL(ut_seq.status, 0);
|
||||
CU_ASSERT_EQUAL(g_seq_operations[ACCEL_OPC_COPY].count, 0);
|
||||
CU_ASSERT_EQUAL(g_seq_operations[ACCEL_OPC_DECOMPRESS].count, 1);
|
||||
|
||||
/* Check a copy operation both at the beginning and the end */
|
||||
seq = NULL;
|
||||
completed = 0;
|
||||
g_seq_operations[ACCEL_OPC_COPY].count = 0;
|
||||
g_seq_operations[ACCEL_OPC_DECOMPRESS].count = 0;
|
||||
g_seq_operations[ACCEL_OPC_DECOMPRESS].src_iovs = &exp_iovs[0];
|
||||
g_seq_operations[ACCEL_OPC_DECOMPRESS].dst_iovs = &exp_iovs[1];
|
||||
exp_iovs[0].iov_base = tmp[0];
|
||||
exp_iovs[0].iov_len = sizeof(tmp[0]);
|
||||
exp_iovs[1].iov_base = buf;
|
||||
exp_iovs[1].iov_len = 2048;
|
||||
|
||||
dst_iovs[0].iov_base = tmp[1];
|
||||
dst_iovs[0].iov_len = sizeof(tmp[1]);
|
||||
src_iovs[0].iov_base = tmp[0];
|
||||
src_iovs[0].iov_len = sizeof(tmp[0]);
|
||||
rc = spdk_accel_append_copy(&seq, ioch, &dst_iovs[0], 1, NULL, NULL,
|
||||
&src_iovs[0], 1, NULL, NULL, 0,
|
||||
ut_sequence_step_cb, &completed);
|
||||
CU_ASSERT_EQUAL(rc, 0);
|
||||
|
||||
dst_iovs[1].iov_base = tmp[2];
|
||||
dst_iovs[1].iov_len = 2048;
|
||||
src_iovs[1].iov_base = tmp[1];
|
||||
src_iovs[1].iov_len = sizeof(tmp[1]);
|
||||
rc = spdk_accel_append_decompress(&seq, ioch, &dst_iovs[1], 1, NULL, NULL,
|
||||
&src_iovs[1], 1, NULL, NULL, 0,
|
||||
ut_sequence_step_cb, &completed);
|
||||
CU_ASSERT_EQUAL(rc, 0);
|
||||
|
||||
dst_iovs[2].iov_base = buf;
|
||||
dst_iovs[2].iov_len = 2048;
|
||||
src_iovs[2].iov_base = tmp[2];
|
||||
src_iovs[2].iov_len = 2048;
|
||||
rc = spdk_accel_append_copy(&seq, ioch, &dst_iovs[2], 1, NULL, NULL,
|
||||
&src_iovs[2], 1, NULL, NULL, 0,
|
||||
ut_sequence_step_cb, &completed);
|
||||
CU_ASSERT_EQUAL(rc, 0);
|
||||
|
||||
ut_seq.complete = false;
|
||||
rc = spdk_accel_sequence_finish(seq, ut_sequence_complete_cb, &ut_seq);
|
||||
CU_ASSERT_EQUAL(rc, 0);
|
||||
|
||||
poll_threads();
|
||||
|
||||
CU_ASSERT_EQUAL(completed, 3);
|
||||
CU_ASSERT(ut_seq.complete);
|
||||
CU_ASSERT_EQUAL(ut_seq.status, 0);
|
||||
CU_ASSERT_EQUAL(g_seq_operations[ACCEL_OPC_COPY].count, 0);
|
||||
CU_ASSERT_EQUAL(g_seq_operations[ACCEL_OPC_DECOMPRESS].count, 1);
|
||||
|
||||
/* Check decompress + copy + decompress + copy */
|
||||
seq = NULL;
|
||||
completed = 0;
|
||||
g_seq_operations[ACCEL_OPC_COPY].count = 0;
|
||||
g_seq_operations[ACCEL_OPC_DECOMPRESS].count = 0;
|
||||
g_seq_operations[ACCEL_OPC_DECOMPRESS].src_iovs = NULL;
|
||||
g_seq_operations[ACCEL_OPC_DECOMPRESS].dst_iovs = NULL;
|
||||
|
||||
dst_iovs[0].iov_base = tmp[1];
|
||||
dst_iovs[0].iov_len = sizeof(tmp[1]);
|
||||
src_iovs[0].iov_base = tmp[0];
|
||||
src_iovs[0].iov_len = sizeof(tmp[0]);
|
||||
rc = spdk_accel_append_decompress(&seq, ioch, &dst_iovs[0], 1, NULL, NULL,
|
||||
&src_iovs[0], 1, NULL, NULL, 0,
|
||||
ut_sequence_step_cb, &completed);
|
||||
CU_ASSERT_EQUAL(rc, 0);
|
||||
|
||||
dst_iovs[1].iov_base = tmp[2];
|
||||
dst_iovs[1].iov_len = 2048;
|
||||
src_iovs[1].iov_base = tmp[1];
|
||||
src_iovs[1].iov_len = sizeof(tmp[1]);
|
||||
rc = spdk_accel_append_copy(&seq, ioch, &dst_iovs[1], 1, NULL, NULL,
|
||||
&src_iovs[1], 1, NULL, NULL, 0,
|
||||
ut_sequence_step_cb, &completed);
|
||||
CU_ASSERT_EQUAL(rc, 0);
|
||||
|
||||
dst_iovs[2].iov_base = tmp[3];
|
||||
dst_iovs[2].iov_len = 1024;
|
||||
src_iovs[2].iov_base = tmp[2];
|
||||
src_iovs[2].iov_len = 2048;
|
||||
rc = spdk_accel_append_decompress(&seq, ioch, &dst_iovs[2], 1, NULL, NULL,
|
||||
&src_iovs[2], 1, NULL, NULL, 0,
|
||||
ut_sequence_step_cb, &completed);
|
||||
CU_ASSERT_EQUAL(rc, 0);
|
||||
|
||||
dst_iovs[3].iov_base = buf;
|
||||
dst_iovs[3].iov_len = 1024;
|
||||
src_iovs[3].iov_base = tmp[3];
|
||||
src_iovs[3].iov_len = 1024;
|
||||
rc = spdk_accel_append_copy(&seq, ioch, &dst_iovs[3], 1, NULL, NULL,
|
||||
&src_iovs[3], 1, NULL, NULL, 0,
|
||||
ut_sequence_step_cb, &completed);
|
||||
CU_ASSERT_EQUAL(rc, 0);
|
||||
|
||||
ut_seq.complete = false;
|
||||
rc = spdk_accel_sequence_finish(seq, ut_sequence_complete_cb, &ut_seq);
|
||||
CU_ASSERT_EQUAL(rc, 0);
|
||||
|
||||
poll_threads();
|
||||
|
||||
CU_ASSERT_EQUAL(completed, 4);
|
||||
CU_ASSERT(ut_seq.complete);
|
||||
CU_ASSERT_EQUAL(ut_seq.status, 0);
|
||||
CU_ASSERT_EQUAL(g_seq_operations[ACCEL_OPC_COPY].count, 0);
|
||||
CU_ASSERT_EQUAL(g_seq_operations[ACCEL_OPC_DECOMPRESS].count, 2);
|
||||
|
||||
/* Check two copy operations - one of them should be removed, while the other should be
|
||||
* executed normally */
|
||||
seq = NULL;
|
||||
completed = 0;
|
||||
g_seq_operations[ACCEL_OPC_COPY].count = 0;
|
||||
|
||||
dst_iovs[0].iov_base = tmp[1];
|
||||
dst_iovs[0].iov_len = sizeof(tmp[1]);
|
||||
src_iovs[0].iov_base = tmp[0];
|
||||
src_iovs[0].iov_len = sizeof(tmp[0]);
|
||||
rc = spdk_accel_append_copy(&seq, ioch, &dst_iovs[0], 1, NULL, NULL,
|
||||
&src_iovs[0], 1, NULL, NULL, 0,
|
||||
ut_sequence_step_cb, &completed);
|
||||
CU_ASSERT_EQUAL(rc, 0);
|
||||
|
||||
dst_iovs[1].iov_base = buf;
|
||||
dst_iovs[1].iov_len = sizeof(buf);
|
||||
src_iovs[1].iov_base = tmp[1];
|
||||
src_iovs[1].iov_len = sizeof(tmp[1]);
|
||||
rc = spdk_accel_append_copy(&seq, ioch, &dst_iovs[1], 1, NULL, NULL,
|
||||
&src_iovs[1], 1, NULL, NULL, 0,
|
||||
ut_sequence_step_cb, &completed);
|
||||
CU_ASSERT_EQUAL(rc, 0);
|
||||
|
||||
ut_seq.complete = false;
|
||||
rc = spdk_accel_sequence_finish(seq, ut_sequence_complete_cb, &ut_seq);
|
||||
CU_ASSERT_EQUAL(rc, 0);
|
||||
|
||||
poll_threads();
|
||||
|
||||
CU_ASSERT_EQUAL(completed, 2);
|
||||
CU_ASSERT(ut_seq.complete);
|
||||
CU_ASSERT_EQUAL(ut_seq.status, 0);
|
||||
CU_ASSERT_EQUAL(g_seq_operations[ACCEL_OPC_COPY].count, 1);
|
||||
|
||||
/* Cleanup module pointers to make subsequent tests work correctly */
|
||||
for (i = 0; i < ACCEL_OPC_LAST; ++i) {
|
||||
g_modules_opc[i] = modules[i];
|
||||
}
|
||||
|
||||
g_seq_operations[ACCEL_OPC_DECOMPRESS].src_iovs = NULL;
|
||||
g_seq_operations[ACCEL_OPC_DECOMPRESS].dst_iovs = NULL;
|
||||
|
||||
spdk_put_io_channel(ioch);
|
||||
poll_threads();
|
||||
}
|
||||
|
||||
static int
|
||||
test_sequence_setup(void)
|
||||
{
|
||||
@ -1256,6 +1536,7 @@ main(int argc, char **argv)
|
||||
#ifdef SPDK_CONFIG_ISAL /* accel_sw requires isa-l for compression */
|
||||
CU_ADD_TEST(seq_suite, test_sequence_decompress);
|
||||
#endif
|
||||
CU_ADD_TEST(seq_suite, test_sequence_copy_elision);
|
||||
|
||||
suite = CU_add_suite("accel", test_setup, test_cleanup);
|
||||
CU_ADD_TEST(suite, test_spdk_accel_task_complete);
|
||||
|
Loading…
Reference in New Issue
Block a user