nvme/perf: generate multiple-SGL payloads in perf tool

A new option -O is added to indicate the IO unit size.
Multiple IOVs are created according to the IO unit size.
We are able to test multiple-SGL SGL requests in NVMEoF
RDMA with this patch.

Signed-off-by: Allen Zhu <allenz@mellanox.com>
Signed-off-by: Alexey Marchuk <alexeymar@mellanox.com>
Signed-off-by: Evgeniy Kochetov <evgeniik@mellanox.com>
Change-Id: I7624966b585bf0a9d2bbbb6263fa06fbcdb65820
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/4377
Community-CI: Mellanox Build Bot
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: John Kariuki <John.K.Kariuki@intel.com>
Reviewed-by: sunshihao <sunshihao@huawei.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
Reviewed-by: Changpeng Liu <changpeng.liu@intel.com>
Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
This commit is contained in:
Allen Zhu 2020-07-29 11:57:10 +03:00 committed by Tomasz Zawadzki
parent aa4b4e17f3
commit f8330a575e

View File

@ -178,7 +178,10 @@ struct ns_worker_ctx {
struct perf_task {
struct ns_worker_ctx *ns_ctx;
struct iovec iov;
struct iovec *iovs; /* array of iovecs to transfer. */
int iovcnt; /* Number of iovecs in iovs array. */
int iovpos; /* Current iovec position. */
uint32_t iov_offset; /* Offset in current iovec. */
struct iovec md_iov;
uint64_t submit_tsc;
bool is_read;
@ -209,6 +212,8 @@ struct ns_fn_table {
void (*cleanup_ns_worker_ctx)(struct ns_worker_ctx *ns_ctx);
};
static uint32_t g_io_unit_size = (UINT32_MAX & (~0x03));
static int g_outstanding_commands;
static bool g_latency_ssd_tracking_enable;
@ -309,18 +314,90 @@ perf_set_sock_zcopy(const char *impl_name, bool enable)
}
}
static void
nvme_perf_reset_sgl(void *ref, uint32_t sgl_offset)
{
struct iovec *iov;
struct perf_task *task = (struct perf_task *)ref;
task->iov_offset = sgl_offset;
for (task->iovpos = 0; task->iovpos < task->iovcnt; task->iovpos++) {
iov = &task->iovs[task->iovpos];
if (task->iov_offset < iov->iov_len) {
break;
}
task->iov_offset -= iov->iov_len;
}
}
static int
nvme_perf_next_sge(void *ref, void **address, uint32_t *length)
{
struct iovec *iov;
struct perf_task *task = (struct perf_task *)ref;
assert(task->iovpos < task->iovcnt);
iov = &task->iovs[task->iovpos];
assert(task->iov_offset <= iov->iov_len);
*address = iov->iov_base + task->iov_offset;
*length = iov->iov_len - task->iov_offset;
task->iovpos++;
task->iov_offset = 0;
return 0;
}
static int
nvme_perf_allocate_iovs(struct perf_task *task, void *buf, uint32_t length)
{
int iovpos = 0;
struct iovec *iov;
uint32_t offset = 0;
task->iovcnt = SPDK_CEIL_DIV(length, (uint64_t)g_io_unit_size);
task->iovs = calloc(task->iovcnt, sizeof(struct iovec));
if (!task->iovs) {
return -1;
}
while (length > 0) {
iov = &task->iovs[iovpos];
iov->iov_len = spdk_min(length, g_io_unit_size);
iov->iov_base = buf + offset;
length -= iov->iov_len;
offset += iov->iov_len;
iovpos++;
}
return 0;
}
#ifdef SPDK_CONFIG_URING
static void
uring_setup_payload(struct perf_task *task, uint8_t pattern)
{
task->iov.iov_base = spdk_dma_zmalloc(g_io_size_bytes, g_io_align, NULL);
task->iov.iov_len = g_io_size_bytes;
if (task->iov.iov_base == NULL) {
fprintf(stderr, "spdk_dma_zmalloc() for task->iov.iov_base failed\n");
struct iovec *iov;
task->iovs = calloc(1, sizeof(struct iovec));
if (!task->iovs) {
fprintf(stderr, "perf task failed to allocate iovs\n");
exit(1);
}
memset(task->iov.iov_base, pattern, task->iov.iov_len);
task->iovcnt = 1;
iov = &task->iovs[0];
iov->iov_base = spdk_dma_zmalloc(g_io_size_bytes, g_io_align, NULL);
iov->iov_len = g_io_size_bytes;
if (iov->iov_base == NULL) {
fprintf(stderr, "spdk_dma_zmalloc() for task->iovs[0].iov_base failed\n");
free(task->iovs);
exit(1);
}
memset(iov->iov_base, pattern, iov->iov_len);
}
static int
@ -336,9 +413,9 @@ uring_submit_io(struct perf_task *task, struct ns_worker_ctx *ns_ctx,
}
if (task->is_read) {
io_uring_prep_readv(sqe, entry->u.uring.fd, &task->iov, 1, offset_in_ios * task->iov.iov_len);
io_uring_prep_readv(sqe, entry->u.uring.fd, task->iovs, 1, offset_in_ios * task->iovs[0].iov_len);
} else {
io_uring_prep_writev(sqe, entry->u.uring.fd, &task->iov, 1, offset_in_ios * task->iov.iov_len);
io_uring_prep_writev(sqe, entry->u.uring.fd, task->iovs, 1, offset_in_ios * task->iovs[0].iov_len);
}
io_uring_sqe_set_data(sqe, task);
@ -373,7 +450,7 @@ uring_check_io(struct ns_worker_ctx *ns_ctx)
for (i = 0; i < count; i++) {
assert(ns_ctx->u.uring.cqes[i] != NULL);
task = (struct perf_task *)ns_ctx->u.uring.cqes[i]->user_data;
if (ns_ctx->u.uring.cqes[i]->res != (int)task->iov.iov_len) {
if (ns_ctx->u.uring.cqes[i]->res != (int)task->iovs[0].iov_len) {
fprintf(stderr, "cqe[i]->status=%d\n", ns_ctx->u.uring.cqes[i]->res);
exit(0);
}
@ -427,13 +504,24 @@ static const struct ns_fn_table uring_fn_table = {
static void
aio_setup_payload(struct perf_task *task, uint8_t pattern)
{
task->iov.iov_base = spdk_dma_zmalloc(g_io_size_bytes, g_io_align, NULL);
task->iov.iov_len = g_io_size_bytes;
if (task->iov.iov_base == NULL) {
fprintf(stderr, "spdk_dma_zmalloc() for task->buf failed\n");
struct iovec *iov;
task->iovs = calloc(1, sizeof(struct iovec));
if (!task->iovs) {
fprintf(stderr, "perf task failed to allocate iovs\n");
exit(1);
}
memset(task->iov.iov_base, pattern, task->iov.iov_len);
task->iovcnt = 1;
iov = &task->iovs[0];
iov->iov_base = spdk_dma_zmalloc(g_io_size_bytes, g_io_align, NULL);
iov->iov_len = g_io_size_bytes;
if (iov->iov_base == NULL) {
fprintf(stderr, "spdk_dma_zmalloc() for task->iovs[0].iov_base failed\n");
free(task->iovs);
exit(1);
}
memset(iov->iov_base, pattern, iov->iov_len);
}
static int
@ -462,10 +550,10 @@ aio_submit_io(struct perf_task *task, struct ns_worker_ctx *ns_ctx,
{
if (task->is_read) {
return aio_submit(ns_ctx->u.aio.ctx, &task->iocb, entry->u.aio.fd, IO_CMD_PREAD,
&task->iov, offset_in_ios, task);
task->iovs, offset_in_ios, task);
} else {
return aio_submit(ns_ctx->u.aio.ctx, &task->iocb, entry->u.aio.fd, IO_CMD_PWRITE,
&task->iov, offset_in_ios, task);
task->iovs, offset_in_ios, task);
}
}
@ -637,18 +725,26 @@ static void
nvme_setup_payload(struct perf_task *task, uint8_t pattern)
{
uint32_t max_io_size_bytes, max_io_md_size;
void *buf;
int rc;
/* maximum extended lba format size from all active namespace,
* it's same with g_io_size_bytes for namespace without metadata.
*/
max_io_size_bytes = g_io_size_bytes + g_max_io_md_size * g_max_io_size_blocks;
task->iov.iov_base = spdk_dma_zmalloc(max_io_size_bytes, g_io_align, NULL);
task->iov.iov_len = max_io_size_bytes;
if (task->iov.iov_base == NULL) {
buf = spdk_dma_zmalloc(max_io_size_bytes, g_io_align, NULL);
if (buf == NULL) {
fprintf(stderr, "task->buf spdk_dma_zmalloc failed\n");
exit(1);
}
memset(task->iov.iov_base, pattern, task->iov.iov_len);
memset(buf, pattern, max_io_size_bytes);
rc = nvme_perf_allocate_iovs(task, buf, max_io_size_bytes);
if (rc < 0) {
fprintf(stderr, "perf task failed to allocate iovs\n");
spdk_dma_free(buf);
exit(1);
}
max_io_md_size = g_max_io_md_size * g_max_io_size_blocks;
if (max_io_md_size != 0) {
@ -656,7 +752,8 @@ nvme_setup_payload(struct perf_task *task, uint8_t pattern)
task->md_iov.iov_len = max_io_md_size;
if (task->md_iov.iov_base == NULL) {
fprintf(stderr, "task->md_buf spdk_dma_zmalloc failed\n");
spdk_dma_free(task->iov.iov_base);
spdk_dma_free(task->iovs[0].iov_base);
free(task->iovs);
exit(1);
}
}
@ -704,23 +801,32 @@ nvme_submit_io(struct perf_task *task, struct ns_worker_ctx *ns_ctx,
}
if (task->is_read) {
return spdk_nvme_ns_cmd_read_with_md(entry->u.nvme.ns, ns_ctx->u.nvme.qpair[qp_num],
task->iov.iov_base, task->md_iov.iov_base,
lba,
entry->io_size_blocks, io_complete,
task, entry->io_flags,
task->dif_ctx.apptag_mask, task->dif_ctx.app_tag);
if (task->iovcnt == 1) {
return spdk_nvme_ns_cmd_read_with_md(entry->u.nvme.ns, ns_ctx->u.nvme.qpair[qp_num],
task->iovs[0].iov_base, task->md_iov.iov_base,
lba,
entry->io_size_blocks, io_complete,
task, entry->io_flags,
task->dif_ctx.apptag_mask, task->dif_ctx.app_tag);
} else {
return spdk_nvme_ns_cmd_readv_with_md(entry->u.nvme.ns, ns_ctx->u.nvme.qpair[qp_num],
lba, entry->io_size_blocks,
io_complete, task, entry->io_flags,
nvme_perf_reset_sgl, nvme_perf_next_sge,
task->md_iov.iov_base,
task->dif_ctx.apptag_mask, task->dif_ctx.app_tag);
}
} else {
switch (mode) {
case DIF_MODE_DIF:
rc = spdk_dif_generate(&task->iov, 1, entry->io_size_blocks, &task->dif_ctx);
rc = spdk_dif_generate(task->iovs, task->iovcnt, entry->io_size_blocks, &task->dif_ctx);
if (rc != 0) {
fprintf(stderr, "Generation of DIF failed\n");
return rc;
}
break;
case DIF_MODE_DIX:
rc = spdk_dix_generate(&task->iov, 1, &task->md_iov, entry->io_size_blocks,
rc = spdk_dix_generate(task->iovs, task->iovcnt, &task->md_iov, entry->io_size_blocks,
&task->dif_ctx);
if (rc != 0) {
fprintf(stderr, "Generation of DIX failed\n");
@ -731,12 +837,21 @@ nvme_submit_io(struct perf_task *task, struct ns_worker_ctx *ns_ctx,
break;
}
return spdk_nvme_ns_cmd_write_with_md(entry->u.nvme.ns, ns_ctx->u.nvme.qpair[qp_num],
task->iov.iov_base, task->md_iov.iov_base,
lba,
entry->io_size_blocks, io_complete,
task, entry->io_flags,
task->dif_ctx.apptag_mask, task->dif_ctx.app_tag);
if (task->iovcnt == 1) {
return spdk_nvme_ns_cmd_write_with_md(entry->u.nvme.ns, ns_ctx->u.nvme.qpair[qp_num],
task->iovs[0].iov_base, task->md_iov.iov_base,
lba,
entry->io_size_blocks, io_complete,
task, entry->io_flags,
task->dif_ctx.apptag_mask, task->dif_ctx.app_tag);
} else {
return spdk_nvme_ns_cmd_writev_with_md(entry->u.nvme.ns, ns_ctx->u.nvme.qpair[qp_num],
lba, entry->io_size_blocks,
io_complete, task, entry->io_flags,
nvme_perf_reset_sgl, nvme_perf_next_sge,
task->md_iov.iov_base,
task->dif_ctx.apptag_mask, task->dif_ctx.app_tag);
}
}
}
@ -769,14 +884,14 @@ nvme_verify_io(struct perf_task *task, struct ns_entry *entry)
}
if (entry->md_interleave) {
rc = spdk_dif_verify(&task->iov, 1, entry->io_size_blocks, &task->dif_ctx,
rc = spdk_dif_verify(task->iovs, task->iovcnt, entry->io_size_blocks, &task->dif_ctx,
&err_blk);
if (rc != 0) {
fprintf(stderr, "DIF error detected. type=%d, offset=%" PRIu32 "\n",
err_blk.err_type, err_blk.err_offset);
}
} else {
rc = spdk_dix_verify(&task->iov, 1, &task->md_iov, entry->io_size_blocks,
rc = spdk_dix_verify(task->iovs, task->iovcnt, &task->md_iov, entry->io_size_blocks,
&task->dif_ctx, &err_blk);
if (rc != 0) {
fprintf(stderr, "DIX error detected. type=%d, offset=%" PRIu32 "\n",
@ -1183,7 +1298,8 @@ task_complete(struct perf_task *task)
* the one just completed.
*/
if (spdk_unlikely(ns_ctx->is_draining)) {
spdk_dma_free(task->iov.iov_base);
spdk_dma_free(task->iovs[0].iov_base);
free(task->iovs);
spdk_dma_free(task->md_iov.iov_base);
free(task);
} else {
@ -1388,6 +1504,7 @@ static void usage(char *program_name)
printf("\t Example: -b 0000:d8:00.0 -b 0000:d9:00.0\n");
printf("\t[-q io depth]\n");
printf("\t[-o io size in bytes]\n");
printf("\t[-O io unit size in bytes (4-byte aligned) for SPDK driver. default: same as io size]\n");
printf("\t[-P number of io queues per namespace. default: 1]\n");
printf("\t[-U number of unused io queues per controller. default: 0]\n");
printf("\t[-w io pattern type, must be one of\n");
@ -1866,7 +1983,7 @@ parse_args(int argc, char **argv)
long int val;
int rc;
while ((op = getopt(argc, argv, "a:b:c:e:gi:lo:q:r:k:s:t:w:z:A:C:DGHILM:NP:RS:T:U:VZ:")) != -1) {
while ((op = getopt(argc, argv, "a:b:c:e:gi:lo:q:r:k:s:t:w:z:A:C:DGHILM:NO:P:RS:T:U:VZ:")) != -1) {
switch (op) {
case 'a':
case 'A':
@ -1874,6 +1991,7 @@ parse_args(int argc, char **argv)
case 'C':
case 'P':
case 'o':
case 'O':
case 'q':
case 'k':
case 's':
@ -1901,6 +2019,9 @@ parse_args(int argc, char **argv)
case 'o':
g_io_size_bytes = val;
break;
case 'O':
g_io_unit_size = val;
break;
case 'q':
g_queue_depth = val;
break;
@ -2045,6 +2166,10 @@ parse_args(int argc, char **argv)
usage(argv[0]);
return 1;
}
if (!g_io_unit_size || g_io_unit_size % 4) {
fprintf(stderr, "io unit size can not be 0 or non 4-byte aligned\n");
return 1;
}
if (!g_workload_type) {
fprintf(stderr, "missing -w (io pattern type) operand\n");
usage(argv[0]);