From 850cd9008245481ba4730ab2b2d3e9ff1425bae5 Mon Sep 17 00:00:00 2001 From: paul luse Date: Thu, 22 Sep 2022 12:01:56 -0700 Subject: [PATCH] accel/idxd/iaa: Convert to use iovecs In prep for upcoming iovec based compression/decompression patches. Signed-off-by: paul luse Change-Id: I413493f764bead9e56266e488b74f8bca979e225 Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/14633 Reviewed-by: Ben Walker Reviewed-by: Jim Harris Tested-by: SPDK CI Jenkins --- examples/accel/perf/accel_perf.c | 20 +++-- include/spdk/accel.h | 28 ++++--- include/spdk/idxd.h | 6 +- include/spdk_internal/accel_module.h | 4 + lib/accel/accel.c | 29 ++++--- lib/accel/accel_sw.c | 120 +++++++++++++++++++++++---- lib/idxd/Makefile | 2 +- lib/idxd/idxd.c | 12 +-- module/accel/iaa/accel_iaa.c | 23 +++-- 9 files changed, 170 insertions(+), 74 deletions(-) diff --git a/examples/accel/perf/accel_perf.c b/examples/accel/perf/accel_perf.c index aa06b9f87..171b90345 100644 --- a/examples/accel/perf/accel_perf.c +++ b/examples/accel/perf/accel_perf.c @@ -28,7 +28,7 @@ static int g_allocate_depth = 0; static int g_threads_per_core = 1; static int g_time_in_sec = 5; static uint32_t g_crc32c_seed = 0; -static uint32_t g_crc32c_chained_count = 1; +static uint32_t g_chained_count = 1; static int g_fail_percent_goal = 0; static uint8_t g_fill_pattern = 255; static bool g_verify = false; @@ -51,6 +51,8 @@ struct ap_task { void *src; struct iovec *src_iovs; uint32_t src_iovcnt; + struct iovec *dst_iovs; + uint32_t dst_iovcnt; void *dst; void *dst2; uint32_t crc_dst; @@ -94,7 +96,6 @@ dump_user_config(void) printf("Workload Type: %s\n", g_workload_type); if (g_workload_selection == ACCEL_OPC_CRC32C || g_workload_selection == ACCEL_OPC_COPY_CRC32C) { printf("CRC-32C seed: %u\n", g_crc32c_seed); - printf("vector count %u\n", g_crc32c_chained_count); } else if (g_workload_selection == ACCEL_OPC_FILL) { printf("Fill pattern: 0x%x\n", g_fill_pattern); } else if ((g_workload_selection == ACCEL_OPC_COMPARE) && g_fail_percent_goal > 0) { @@ -102,10 +103,11 @@ dump_user_config(void) } if (g_workload_selection == ACCEL_OPC_COPY_CRC32C) { printf("Vector size: %u bytes\n", g_xfer_size_bytes); - printf("Transfer size: %u bytes\n", g_xfer_size_bytes * g_crc32c_chained_count); + printf("Transfer size: %u bytes\n", g_xfer_size_bytes * g_chained_count); } else { printf("Transfer size: %u bytes\n", g_xfer_size_bytes); } + printf("vector count %u\n", g_chained_count); printf("Module: %s\n", module_name); printf("Queue depth: %u\n", g_queue_depth); printf("Allocate depth: %u\n", g_allocate_depth); @@ -120,7 +122,7 @@ usage(void) printf("accel_perf options:\n"); printf("\t[-h help message]\n"); printf("\t[-q queue depth per core]\n"); - printf("\t[-C for crc32c workload, use this value to configure the io vector size to test (default 1)\n"); + printf("\t[-C for supported workloads, use this value to configure the io vector size to test (default 1)\n"); printf("\t[-T number of threads per core\n"); printf("\t[-n number of channels]\n"); printf("\t[-o transfer size in bytes]\n"); @@ -165,7 +167,7 @@ parse_args(int argc, char *argv) g_allocate_depth = argval; break; case 'C': - g_crc32c_chained_count = argval; + g_chained_count = argval; break; case 'f': g_fill_pattern = (uint8_t)argval; @@ -251,8 +253,8 @@ _get_task_data_bufs(struct ap_task *task) } if (g_workload_selection == ACCEL_OPC_CRC32C || g_workload_selection == ACCEL_OPC_COPY_CRC32C) { - assert(g_crc32c_chained_count > 0); - task->src_iovcnt = g_crc32c_chained_count; + assert(g_chained_count > 0); + task->src_iovcnt = g_chained_count; task->src_iovs = calloc(task->src_iovcnt, sizeof(struct iovec)); if (!task->src_iovs) { fprintf(stderr, "cannot allocated task->src_iovs fot task=%p\n", task); @@ -260,7 +262,7 @@ _get_task_data_bufs(struct ap_task *task) } if (g_workload_selection == ACCEL_OPC_COPY_CRC32C) { - dst_buff_len = g_xfer_size_bytes * g_crc32c_chained_count; + dst_buff_len = g_xfer_size_bytes * g_chained_count; } for (i = 0; i < task->src_iovcnt; i++) { @@ -750,7 +752,7 @@ main(int argc, char **argv) } if ((g_workload_selection == ACCEL_OPC_CRC32C || g_workload_selection == ACCEL_OPC_COPY_CRC32C) && - g_crc32c_chained_count == 0) { + g_chained_count == 0) { usage(); g_rc = -1; goto cleanup; diff --git a/include/spdk/accel.h b/include/spdk/accel.h index 5f042a832..4840daf6a 100644 --- a/include/spdk/accel.h +++ b/include/spdk/accel.h @@ -221,10 +221,10 @@ int spdk_accel_submit_copy_crc32cv(struct spdk_io_channel *ch, void *dst, struct * This function will build the compress descriptor and submit it. * * \param ch I/O channel associated with this call - * \param dst Destination to compress to. - * \param src Source to read from. - * \param nbytes_dst Length in bytes of output buffer. - * \param nbytes_src Length in bytes of input buffer. + * \param dst Destination to write the data to. + * \param nbytes Length in bytes. + * \param src_iovs The io vector array which stores the src data and len. + * \param src_iovcnt The size of the src io vectors. * \param output_size The size of the compressed data * \param flags Flags, optional flags that can vary per operation. * \param cb_fn Callback function which will be called when the request is complete. @@ -233,9 +233,10 @@ int spdk_accel_submit_copy_crc32cv(struct spdk_io_channel *ch, void *dst, struct * * \return 0 on success, negative errno on failure. */ -int spdk_accel_submit_compress(struct spdk_io_channel *ch, void *dst, void *src, - uint64_t nbytes_dst, uint64_t nbytes_src, uint32_t *output_size, - int flags, spdk_accel_completion_cb cb_fn, void *cb_arg); +int spdk_accel_submit_compress(struct spdk_io_channel *ch, void *dst, + uint64_t nbytes, struct iovec *src_iovs, + size_t src_iovcnt, uint32_t *output_size, int flags, + spdk_accel_completion_cb cb_fn, void *cb_arg); /** * Build and submit a memory decompress request. @@ -243,10 +244,10 @@ int spdk_accel_submit_compress(struct spdk_io_channel *ch, void *dst, void *src, * This function will build the decompress descriptor and submit it. * * \param ch I/O channel associated with this call - * \param dst Destination. Must be large enough to hold decompressed data. - * \param src Source to read from. - * \param nbytes_dst Length in bytes of output buffer. - * \param nbytes_src Length in bytes of input buffer. + * \param dst_iovs The io vector array which stores the dst data and len. + * \param dst_iovcnt The size of the dst io vectors. + * \param src_iovs The io vector array which stores the src data and len. + * \param src_iovcnt The size of the src io vectors. * \param flags Flags, optional flags that can vary per operation. * \param cb_fn Callback function which will be called when the request is complete. * \param cb_arg Opaque value which will be passed back as the arg parameter in @@ -254,8 +255,9 @@ int spdk_accel_submit_compress(struct spdk_io_channel *ch, void *dst, void *src, * * \return 0 on success, negative errno on failure. */ -int spdk_accel_submit_decompress(struct spdk_io_channel *ch, void *dst, void *src, - uint64_t nbytes_dst, uint64_t nbytes_src, int flags, +int spdk_accel_submit_decompress(struct spdk_io_channel *ch, struct iovec *dst_iovs, + size_t dst_iovcnt, struct iovec *src_iovs, + size_t src_iovcnt, int flags, spdk_accel_completion_cb cb_fn, void *cb_arg); /** diff --git a/include/spdk/idxd.h b/include/spdk/idxd.h index 4015c2eb5..7f836a574 100644 --- a/include/spdk/idxd.h +++ b/include/spdk/idxd.h @@ -265,8 +265,8 @@ int spdk_idxd_submit_copy_crc32c(struct spdk_idxd_io_channel *chan, * by writing to the proper device portal. * * \param chan IDXD channel to submit request. - * \param diov Destination iovec. diov with diovcnt must be large enough to hold compressed data. - * \param diovcnt Number of elements in diov for decompress buffer. + * \param dst Destination to write the compressed data to. + * \param nbytes Length in bytes. The dst buffer should be large enough to hold the compressed data. * \param siov Source iovec * \param siovcnt Number of elements in siov * \param output_size The size of the compressed data @@ -278,7 +278,7 @@ int spdk_idxd_submit_copy_crc32c(struct spdk_idxd_io_channel *chan, * \return 0 on success, negative errno on failure. */ int spdk_idxd_submit_compress(struct spdk_idxd_io_channel *chan, - struct iovec *diov, uint32_t diovcnt, + void *dst, uint64_t nbytes, struct iovec *siov, uint32_t siovcnt, uint32_t *output_size, int flags, spdk_idxd_req_cb cb_fn, void *cb_arg); diff --git a/include/spdk_internal/accel_module.h b/include/spdk_internal/accel_module.h index 1ffe2d17e..6790a6bb4 100644 --- a/include/spdk_internal/accel_module.h +++ b/include/spdk_internal/accel_module.h @@ -28,6 +28,10 @@ struct spdk_accel_task { void *src; }; union { + struct { + struct iovec *iovs; /* iovs passed by the caller */ + uint32_t iovcnt; /* iovcnt passed by the caller */ + } d; void *dst; void *src2; }; diff --git a/lib/accel/accel.c b/lib/accel/accel.c index dcc4e0835..f6adffa15 100644 --- a/lib/accel/accel.c +++ b/lib/accel/accel.c @@ -388,25 +388,31 @@ spdk_accel_submit_copy_crc32cv(struct spdk_io_channel *ch, void *dst, } int -spdk_accel_submit_compress(struct spdk_io_channel *ch, void *dst, void *src, uint64_t nbytes_dst, - uint64_t nbytes_src, uint32_t *output_size, int flags, +spdk_accel_submit_compress(struct spdk_io_channel *ch, void *dst, uint64_t nbytes, + struct iovec *src_iovs, size_t src_iovcnt, uint32_t *output_size, int flags, spdk_accel_completion_cb cb_fn, void *cb_arg) { struct accel_io_channel *accel_ch = spdk_io_channel_get_ctx(ch); struct spdk_accel_task *accel_task; struct spdk_accel_module_if *module = g_modules_opc[ACCEL_OPC_COMPRESS]; struct spdk_io_channel *module_ch = accel_ch->module_ch[ACCEL_OPC_COMPRESS]; + size_t i, src_len = 0; accel_task = _get_task(accel_ch, cb_fn, cb_arg); if (accel_task == NULL) { return -ENOMEM; } + for (i = 0; i < src_iovcnt; i++) { + src_len += src_iovs[i].iov_len; + } + + accel_task->nbytes = src_len; accel_task->output_size = output_size; - accel_task->src = src; + accel_task->s.iovs = src_iovs; + accel_task->s.iovcnt = src_iovcnt; accel_task->dst = dst; - accel_task->nbytes = nbytes_src; - accel_task->nbytes_dst = nbytes_dst; + accel_task->nbytes_dst = nbytes; accel_task->flags = flags; accel_task->op_code = ACCEL_OPC_COMPRESS; @@ -416,8 +422,9 @@ spdk_accel_submit_compress(struct spdk_io_channel *ch, void *dst, void *src, uin } int -spdk_accel_submit_decompress(struct spdk_io_channel *ch, void *dst, void *src, uint64_t nbytes_dst, - uint64_t nbytes_src, int flags, spdk_accel_completion_cb cb_fn, void *cb_arg) +spdk_accel_submit_decompress(struct spdk_io_channel *ch, struct iovec *dst_iovs, + size_t dst_iovcnt, struct iovec *src_iovs, size_t src_iovcnt, + int flags, spdk_accel_completion_cb cb_fn, void *cb_arg) { struct accel_io_channel *accel_ch = spdk_io_channel_get_ctx(ch); struct spdk_accel_task *accel_task; @@ -429,10 +436,10 @@ spdk_accel_submit_decompress(struct spdk_io_channel *ch, void *dst, void *src, u return -ENOMEM; } - accel_task->src = src; - accel_task->dst = dst; - accel_task->nbytes = nbytes_src; - accel_task->nbytes_dst = nbytes_dst; + accel_task->s.iovs = src_iovs; + accel_task->s.iovcnt = src_iovcnt; + accel_task->d.iovs = dst_iovs; + accel_task->d.iovcnt = dst_iovcnt; accel_task->flags = flags; accel_task->op_code = ACCEL_OPC_DECOMPRESS; diff --git a/lib/accel/accel_sw.c b/lib/accel/accel_sw.c index da23bd5e2..768699e63 100644 --- a/lib/accel/accel_sw.c +++ b/lib/accel/accel_sw.c @@ -175,18 +175,74 @@ static int _sw_accel_compress(struct sw_accel_io_channel *sw_ch, struct spdk_accel_task *accel_task) { #ifdef SPDK_CONFIG_ISAL - sw_ch->stream.next_in = accel_task->src; - sw_ch->stream.next_out = accel_task->dst; - sw_ch->stream.avail_in = accel_task->nbytes; - sw_ch->stream.avail_out = accel_task->nbytes_dst; + size_t last_seglen = accel_task->s.iovs[accel_task->s.iovcnt - 1].iov_len; + struct iovec *siov = accel_task->s.iovs; + struct iovec *diov = accel_task->d.iovs; + size_t remaining = accel_task->nbytes; + uint32_t s = 0, d = 0; + int rc = 0; - isal_deflate_stateless(&sw_ch->stream); + accel_task->d.iovcnt = 1; + diov[0].iov_base = accel_task->dst; + diov[0].iov_len = accel_task->nbytes_dst; + + isal_deflate_reset(&sw_ch->stream); + sw_ch->stream.end_of_stream = 0; + sw_ch->stream.next_out = diov[d].iov_base; + sw_ch->stream.avail_out = diov[d].iov_len; + sw_ch->stream.next_in = siov[s].iov_base; + sw_ch->stream.avail_in = siov[s].iov_len; + + do { + /* if isal has exhausted the current dst iovec, move to the next + * one if there is one */ + if (sw_ch->stream.avail_out == 0) { + if (++d < accel_task->d.iovcnt) { + sw_ch->stream.next_out = diov[d].iov_base; + sw_ch->stream.avail_out = diov[d].iov_len; + assert(sw_ch->stream.avail_out > 0); + } else { + /* we have no avail_out but also no more iovecs left so this is + * the case where the output buffer was a perfect fit for the + * compressed data and we're done. */ + break; + } + } + + /* if isal has exhausted the current src iovec, move to the next + * one if there is one */ + if (sw_ch->stream.avail_in == 0 && ((s + 1) < accel_task->s.iovcnt)) { + s++; + sw_ch->stream.next_in = siov[s].iov_base; + sw_ch->stream.avail_in = siov[s].iov_len; + assert(sw_ch->stream.avail_in > 0); + } + + if (remaining <= last_seglen) { + /* Need to set end of stream on last block */ + sw_ch->stream.end_of_stream = 1; + } + + rc = isal_deflate(&sw_ch->stream); + if (rc) { + SPDK_ERRLOG("isal_deflate retunred error %d.\n", rc); + } + + if (remaining > 0) { + assert(siov[s].iov_len > sw_ch->stream.avail_in); + remaining -= (siov[s].iov_len - sw_ch->stream.avail_in); + } + + } while (remaining > 0 || sw_ch->stream.avail_out == 0); + assert(sw_ch->stream.avail_in == 0); + + /* Get our total output size */ if (accel_task->output_size != NULL) { - assert(accel_task->nbytes_dst > sw_ch->stream.avail_out); - *accel_task->output_size = accel_task->nbytes_dst - sw_ch->stream.avail_out; + assert(sw_ch->stream.total_out > 0); + *accel_task->output_size = sw_ch->stream.total_out; } - return 0; + return rc; #else SPDK_ERRLOG("ISAL option is required to use software compression.\n"); return -EINVAL; @@ -197,17 +253,44 @@ static int _sw_accel_decompress(struct sw_accel_io_channel *sw_ch, struct spdk_accel_task *accel_task) { #ifdef SPDK_CONFIG_ISAL - int rc; + struct iovec *siov = accel_task->s.iovs; + struct iovec *diov = accel_task->d.iovs; + uint32_t s = 0, d = 0; + int rc = 0; - sw_ch->state.next_in = accel_task->src; - sw_ch->state.avail_in = accel_task->nbytes; - sw_ch->state.next_out = accel_task->dst; - sw_ch->state.avail_out = accel_task->nbytes_dst; + isal_inflate_reset(&sw_ch->state); + sw_ch->state.next_out = diov[d].iov_base; + sw_ch->state.avail_out = diov[d].iov_len; + sw_ch->state.next_in = siov[s].iov_base; + sw_ch->state.avail_in = siov[s].iov_len; + + do { + /* if isal has exhausted the current dst iovec, move to the next + * one if there is one */ + if (sw_ch->state.avail_out == 0 && ((d + 1) < accel_task->d.iovcnt)) { + d++; + sw_ch->state.next_out = diov[d].iov_base; + sw_ch->state.avail_out = diov[d].iov_len; + assert(sw_ch->state.avail_out > 0); + } + + /* if isal has exhausted the current src iovec, move to the next + * one if there is one */ + if (sw_ch->state.avail_in == 0 && ((s + 1) < accel_task->s.iovcnt)) { + s++; + sw_ch->state.next_in = siov[s].iov_base; + sw_ch->state.avail_in = siov[s].iov_len; + assert(sw_ch->state.avail_in > 0); + } + + rc = isal_inflate(&sw_ch->state); + if (rc) { + SPDK_ERRLOG("isal_inflate retunred error %d.\n", rc); + } + + } while (sw_ch->state.block_state < ISAL_BLOCK_FINISH); + assert(sw_ch->state.avail_in == 0); - rc = isal_inflate_stateless(&sw_ch->state); - if (rc) { - SPDK_ERRLOG("isal_inflate_stateless retunred error %d.\n", rc); - } return rc; #else SPDK_ERRLOG("ISAL option is required to use software decompression.\n"); @@ -333,7 +416,8 @@ sw_accel_create_cb(void *io_device, void *ctx_buf) sw_ch->completion_poller = SPDK_POLLER_REGISTER(accel_comp_poll, sw_ch, 0); #ifdef SPDK_CONFIG_ISAL - isal_deflate_stateless_init(&sw_ch->stream); + isal_deflate_init(&sw_ch->stream); + sw_ch->stream.flush = NO_FLUSH; sw_ch->stream.level = 1; sw_ch->stream.level_buf = calloc(1, ISAL_DEF_LVL1_DEFAULT); if (sw_ch->stream.level_buf == NULL) { diff --git a/lib/idxd/Makefile b/lib/idxd/Makefile index d86a9f97e..6f57c81e8 100644 --- a/lib/idxd/Makefile +++ b/lib/idxd/Makefile @@ -6,7 +6,7 @@ SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..) include $(SPDK_ROOT_DIR)/mk/spdk.common.mk -SO_VER := 8 +SO_VER := 9 SO_MINOR := 0 C_SRCS = idxd.c idxd_user.c diff --git a/lib/idxd/idxd.c b/lib/idxd/idxd.c index 2e3f2396a..ce82b4f47 100644 --- a/lib/idxd/idxd.c +++ b/lib/idxd/idxd.c @@ -1170,22 +1170,22 @@ error: int spdk_idxd_submit_compress(struct spdk_idxd_io_channel *chan, - struct iovec *diov, uint32_t diovcnt, + void *dst, uint64_t nbytes, struct iovec *siov, uint32_t siovcnt, uint32_t *output_size, int flags, spdk_idxd_req_cb cb_fn, void *cb_arg) { assert(chan != NULL); - assert(diov != NULL); + assert(dst != NULL); assert(siov != NULL); - if (diovcnt == 1 && siovcnt == 1) { + if (siovcnt == 1) { /* Simple case - copying one buffer to another */ - if (diov[0].iov_len < siov[0].iov_len) { + if (nbytes < siov[0].iov_len) { return -EINVAL; } - return _idxd_submit_compress_single(chan, diov[0].iov_base, siov[0].iov_base, - diov[0].iov_len, siov[0].iov_len, + return _idxd_submit_compress_single(chan, dst, siov[0].iov_base, + nbytes, siov[0].iov_len, output_size, flags, cb_fn, cb_arg); } /* TODO: vectored support */ diff --git a/module/accel/iaa/accel_iaa.c b/module/accel/iaa/accel_iaa.c index 9365b8468..ad26bb1d7 100644 --- a/module/accel/iaa/accel_iaa.c +++ b/module/accel/iaa/accel_iaa.c @@ -124,28 +124,25 @@ _process_single_task(struct spdk_io_channel *ch, struct spdk_accel_task *task) struct idxd_io_channel *chan = spdk_io_channel_get_ctx(ch); struct idxd_task *idxd_task; int rc = 0; - struct iovec siov = {}; - struct iovec diov = {}; int flags = 0; idxd_task = SPDK_CONTAINEROF(task, struct idxd_task, task); idxd_task->chan = chan; + /* TODO: iovec supprot */ + if (task->d.iovcnt > 1 || task->s.iovcnt > 1) { + SPDK_ERRLOG("fatal: IAA does not support > 1 iovec\n"); + assert(0); + } + switch (task->op_code) { case ACCEL_OPC_COMPRESS: - siov.iov_base = task->src; - siov.iov_len = task->nbytes; - diov.iov_base = task->dst; - diov.iov_len = task->nbytes_dst; - rc = spdk_idxd_submit_compress(chan->chan, &diov, 1, &siov, 1, task->output_size, - flags, iaa_done, idxd_task); + rc = spdk_idxd_submit_compress(chan->chan, task->dst, task->nbytes_dst, task->s.iovs, + task->s.iovcnt, task->output_size, flags, iaa_done, idxd_task); break; case ACCEL_OPC_DECOMPRESS: - siov.iov_base = task->src; - siov.iov_len = task->nbytes; - diov.iov_base = task->dst; - diov.iov_len = task->nbytes_dst; - rc = spdk_idxd_submit_decompress(chan->chan, &diov, 1, &siov, 1, flags, iaa_done, idxd_task); + rc = spdk_idxd_submit_decompress(chan->chan, task->d.iovs, task->d.iovcnt, task->s.iovs, + task->s.iovcnt, flags, iaa_done, idxd_task); break; default: assert(false);