diff --git a/examples/accel/perf/accel_perf.c b/examples/accel/perf/accel_perf.c index fd59a488b..e56778964 100644 --- a/examples/accel/perf/accel_perf.c +++ b/examples/accel/perf/accel_perf.c @@ -80,7 +80,10 @@ struct ap_task { uint32_t iov_cnt; void *dst; void *dst2; - uint32_t crc_dst; + union { + uint32_t crc_dst; + uint32_t output_size; + }; struct worker_thread *worker; int expected_status; /* used for the compare operation */ TAILQ_ENTRY(ap_task) link; @@ -101,6 +104,8 @@ struct worker_thread { struct spdk_poller *stop_poller; void *task_base; struct display_info display; + enum accel_opcode workload; + void *rnd_data; }; static void @@ -142,7 +147,7 @@ usage(void) printf("\t[-n number of channels]\n"); printf("\t[-o transfer size in bytes]\n"); printf("\t[-t time in seconds]\n"); - printf("\t[-w workload type must be one of these: copy, fill, crc32c, copy_crc32c, compare, dualcast\n"); + printf("\t[-w workload type must be one of these: copy, fill, crc32c, copy_crc32c, compare, compress, dualcast\n"); printf("\t[-s for crc32c workload, use this seed value (default 0)\n"); printf("\t[-P for compare workload, percentage of operations that should miscompare (percent, default 0)\n"); printf("\t[-f for fill workload, use this BYTE value (default 255)\n"); @@ -222,6 +227,8 @@ parse_args(int argc, char *argv) g_workload_selection = ACCEL_OPC_COMPARE; } else if (!strcmp(g_workload_type, "dualcast")) { g_workload_selection = ACCEL_OPC_DUALCAST; + } else if (!strcmp(g_workload_type, "compress")) { + g_workload_selection = ACCEL_OPC_COMPRESS; } break; default: @@ -239,6 +246,7 @@ unregister_worker(void *arg1) struct worker_thread *worker = arg1; free(worker->task_base); + free(worker->rnd_data); spdk_put_io_channel(worker->ch); pthread_mutex_lock(&g_workers_lock); assert(g_num_workers >= 1); @@ -296,6 +304,8 @@ _get_task_data_bufs(struct ap_task *task) /* For fill, set the entire src buffer so we can check if verify is enabled. */ if (g_workload_selection == ACCEL_OPC_FILL) { memset(task->src, g_fill_pattern, g_xfer_size_bytes); + } else if (g_workload_selection == ACCEL_OPC_COMPRESS) { + memcpy(task->src, task->worker->rnd_data, g_xfer_size_bytes); } else { memset(task->src, DATA_PATTERN, g_xfer_size_bytes); } @@ -316,13 +326,24 @@ _get_task_data_bufs(struct ap_task *task) } } - if (g_workload_selection == ACCEL_OPC_DUALCAST) { + /* For dualcast 2 buffers are needed for the operation. For compress we use the second buffer to + * store the original pre-compressed data so we have a copy of it when we go to decompress. + */ + if (g_workload_selection == ACCEL_OPC_DUALCAST || g_workload_selection == ACCEL_OPC_COMPRESS) { task->dst2 = spdk_dma_zmalloc(g_xfer_size_bytes, align, NULL); if (task->dst2 == NULL) { fprintf(stderr, "Unable to alloc dst buffer\n"); return -ENOMEM; } - memset(task->dst2, ~DATA_PATTERN, g_xfer_size_bytes); + if (g_workload_selection == ACCEL_OPC_DUALCAST) { + memset(task->dst2, ~DATA_PATTERN, g_xfer_size_bytes); + } else if (g_workload_selection == ACCEL_OPC_COMPRESS) { + /* copy the oriignal data to dst2 so we can compare it to + * the results of decompression if -y is used. + */ + assert(task->src); /* for scan-build */ + memcpy(task->dst2, task->src, g_xfer_size_bytes); + } } return 0; @@ -354,7 +375,7 @@ _submit_single(struct worker_thread *worker, struct ap_task *task) assert(worker); - switch (g_workload_selection) { + switch (worker->workload) { case ACCEL_OPC_COPY: rc = spdk_accel_submit_copy(worker->ch, task->dst, task->src, g_xfer_size_bytes, flags, accel_done, task); @@ -389,6 +410,11 @@ _submit_single(struct worker_thread *worker, struct ap_task *task) rc = spdk_accel_submit_dualcast(worker->ch, task->dst, task->dst2, task->src, g_xfer_size_bytes, flags, accel_done, task); break; + case ACCEL_OPC_COMPRESS: + rc = spdk_accel_submit_compress(worker->ch, task->dst, task->src, + g_xfer_size_bytes, g_xfer_size_bytes, &task->output_size, + flags, accel_done, task); + break; default: assert(false); break; @@ -419,7 +445,7 @@ _free_task_buffers(struct ap_task *task) } spdk_dma_free(task->dst); - if (g_workload_selection == ACCEL_OPC_DUALCAST) { + if (g_workload_selection == ACCEL_OPC_DUALCAST || g_workload_selection == ACCEL_OPC_COMPRESS) { spdk_dma_free(task->dst2); } } @@ -446,18 +472,26 @@ _vector_memcmp(void *_dst, struct iovec *src_iovs, uint32_t iovcnt) return 0; } +static int _worker_stop(void *arg); + static void accel_done(void *arg1, int status) { struct ap_task *task = arg1; struct worker_thread *worker = task->worker; uint32_t sw_crc32c; + int rc; assert(worker); assert(worker->current_queue_depth > 0); + if (!worker->is_draining && status == -EINVAL && worker->workload == ACCEL_OPC_COMPRESS) { + printf("Invalid configuration, compress workload needs ISA-L or IAA. Exiting\n"); + _worker_stop(worker); + } + if (g_verify && status == 0) { - switch (g_workload_selection) { + switch (worker->workload) { case ACCEL_OPC_COPY_CRC32C: sw_crc32c = spdk_crc32c_iov_update(task->iovs, task->iov_cnt, ~g_crc32c_seed); if (task->crc_dst != sw_crc32c) { @@ -500,6 +534,31 @@ accel_done(void *arg1, int status) break; case ACCEL_OPC_COMPARE: break; + case ACCEL_OPC_COMPRESS: + /* We've completed the compression phase, now need to uncompress the compressed data + * and compare that to the original buffer to see if it matches. So we flip flor + * src and destination then compare task->src to task->dst which is where we saved + * the orgiinal data. + */ + if (!worker->is_draining) { + worker->workload = ACCEL_OPC_DECOMPRESS; + worker->xfer_completed++; + memset(task->src, 0, g_xfer_size_bytes); + rc = spdk_accel_submit_decompress(worker->ch, task->src, task->dst, + g_xfer_size_bytes, g_xfer_size_bytes, 0, accel_done, task); + if (rc) { + SPDK_NOTICELOG("Unable to submit decomrpess for verficiation, tc = %d\n", rc); + } + return; + } + break; + case ACCEL_OPC_DECOMPRESS: + worker->workload = ACCEL_OPC_COMPRESS; + if (memcmp(task->dst2, task->src, g_xfer_size_bytes)) { + SPDK_NOTICELOG("Data miscompare after decompression\n"); + worker->xfer_failed++; + } + break; default: assert(false); break; @@ -509,6 +568,7 @@ accel_done(void *arg1, int status) if (task->expected_status == -EILSEQ) { assert(status != 0); worker->injected_miscompares++; + status = 0; } else if (status) { /* Expected to pass but the accel engine reported an error (ex: COMPARE operation). */ worker->xfer_failed++; @@ -517,7 +577,7 @@ accel_done(void *arg1, int status) worker->xfer_completed++; worker->current_queue_depth--; - if (!worker->is_draining) { + if (!worker->is_draining && status == 0) { TAILQ_INSERT_TAIL(&worker->tasks_pool, task, link); task = _get_task(worker); _submit_single(worker, task); @@ -619,6 +679,8 @@ _init_thread(void *arg1) struct ap_task *task; int i, num_tasks = g_allocate_depth; struct display_info *display = arg1; + uint8_t *offset; + uint64_t j; worker = calloc(1, sizeof(*worker)); if (worker == NULL) { @@ -627,6 +689,7 @@ _init_thread(void *arg1) return; } + worker->workload = g_workload_selection; worker->display.core = display->core; worker->display.thread = display->thread; free(display); @@ -647,6 +710,22 @@ _init_thread(void *arg1) goto error; } + if (g_workload_selection == ACCEL_OPC_COMPRESS) { + worker->rnd_data = calloc(1, g_xfer_size_bytes); + if (worker->rnd_data == NULL) { + printf("unable to allcoate rnd_data buffer\n"); + goto error; + } + /* only fill half the data buffer with rnd data to make it more + * compressible. + */ + offset = worker->rnd_data; + for (j = 0; j < g_xfer_size_bytes / sizeof(uint8_t) / 2; j++) { + *offset = rand() % 256; + offset++; + } + } + task = worker->task_base; for (i = 0; i < num_tasks; i++) { TAILQ_INSERT_TAIL(&worker->tasks_pool, task, link); @@ -675,6 +754,7 @@ _init_thread(void *arg1) return; error: + free(worker->rnd_data); _free_task_buffers_in_pool(worker); free(worker->task_base); free(worker); @@ -737,7 +817,8 @@ main(int argc, char **argv) (g_workload_selection != ACCEL_OPC_CRC32C) && (g_workload_selection != ACCEL_OPC_COPY_CRC32C) && (g_workload_selection != ACCEL_OPC_COMPARE) && - (g_workload_selection != ACCEL_OPC_DUALCAST)) { + (g_workload_selection != ACCEL_OPC_DUALCAST) && + (g_workload_selection != ACCEL_OPC_COMPRESS)) { usage(); g_rc = -1; goto cleanup;