bdevperf: support latency info in bdevperf output

Get latency info from bdev histogram and print latency info in
bdevperf output.
print min/max/average latency info by default;
print summarized histogram info if run bdevperf with '-l';
print detailed histogram info if run bdevperf with '-ll'.

Change-Id: I3261574a12c038acde81a58727e9942c0f8c57b2
Signed-off-by: Richael Zhuang <richael.zhuang@arm.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/14640
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Community-CI: Mellanox Build Bot
Reviewed-by: Shuhei Matsumoto <smatsumoto@nvidia.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
This commit is contained in:
Richael Zhuang 2022-09-13 18:07:09 +08:00 committed by Tomasz Zawadzki
parent a1fb441c84
commit 723dd06eb8

View File

@ -19,6 +19,7 @@
#include "spdk/bit_array.h"
#include "spdk/conf.h"
#include "spdk/zipf.h"
#include "spdk/histogram_data.h"
#define BDEVPERF_CONFIG_MAX_FILENAME 1024
#define BDEVPERF_CONFIG_UNDEFINED -1
@ -75,6 +76,34 @@ static struct spdk_poller *g_perf_timer = NULL;
static void bdevperf_submit_single(struct bdevperf_job *job, struct bdevperf_task *task);
static void rpc_perform_tests_cb(void);
static uint32_t g_bdev_count = 0;
static uint32_t g_latency_display_level;
static const double g_latency_cutoffs[] = {
0.01,
0.10,
0.25,
0.50,
0.75,
0.90,
0.95,
0.98,
0.99,
0.995,
0.999,
0.9999,
0.99999,
0.999999,
0.9999999,
-1,
};
struct latency_info {
uint64_t min;
uint64_t max;
uint64_t total;
};
struct bdevperf_job {
char *name;
struct spdk_bdev *bdev;
@ -116,6 +145,9 @@ struct bdevperf_job {
struct spdk_zipf *zipf;
TAILQ_HEAD(, bdevperf_task) task_list;
uint64_t run_time_in_usec;
/* keep channel's histogram data before being destroyed */
struct spdk_histogram_data *histogram;
};
struct spdk_bdevperf {
@ -169,9 +201,13 @@ struct bdevperf_aggregate_stats {
double total_mb_per_second;
double total_failed_per_second;
double total_timeout_per_second;
double min_latency;
double max_latency;
uint64_t total_io_completed;
uint64_t total_tsc;
};
static struct bdevperf_aggregate_stats g_stats = {};
static struct bdevperf_aggregate_stats g_stats = {.min_latency = (double)UINT64_MAX};
/*
* Cumulative Moving Average (CMA): average of all data up to current
@ -201,11 +237,36 @@ get_ema_io_per_second(struct bdevperf_job *job, uint64_t ema_period)
return job->ema_io_per_second;
}
static void
get_avg_latency(void *ctx, uint64_t start, uint64_t end, uint64_t count,
uint64_t total, uint64_t so_far)
{
struct latency_info *latency_info = ctx;
if (count == 0) {
return;
}
latency_info->total += (start + end) / 2 * count;
if (so_far == count) {
latency_info->min = start;
}
if (so_far == total) {
latency_info->max = end;
}
}
static void
performance_dump_job(struct bdevperf_aggregate_stats *stats, struct bdevperf_job *job)
{
double io_per_second, mb_per_second, failed_per_second, timeout_per_second;
double average_latency = 0.0, min_latency, max_latency;
uint64_t time_in_usec;
uint64_t tsc_rate;
uint64_t total_io;
struct latency_info latency_info = {};
printf("\r Job: %s (Core Mask 0x%s)\n", spdk_thread_get_name(job->thread),
spdk_cpuset_fmt(spdk_thread_get_cpumask(job->thread)));
@ -235,20 +296,41 @@ performance_dump_job(struct bdevperf_aggregate_stats *stats, struct bdevperf_job
} else {
io_per_second = get_ema_io_per_second(job, stats->ema_period);
}
tsc_rate = spdk_get_ticks_hz();
mb_per_second = io_per_second * job->io_size / (1024 * 1024);
spdk_histogram_data_iterate(job->histogram, get_avg_latency, &latency_info);
total_io = job->io_completed + job->io_failed;
if (total_io != 0) {
average_latency = (double)latency_info.total / total_io * 1000 * 1000 / tsc_rate;
}
min_latency = (double)latency_info.min * 1000 * 1000 / tsc_rate;
max_latency = (double)latency_info.max * 1000 * 1000 / tsc_rate;
failed_per_second = (double)job->io_failed * 1000000 / time_in_usec;
timeout_per_second = (double)job->io_timeout * 1000000 / time_in_usec;
printf("\t %-20s: %10.2f %10.2f %10.2f",
job->name, (float)time_in_usec / 1000000, io_per_second, mb_per_second);
printf(" %10.2f %8.2f\n",
printf(" %10.2f %8.2f",
failed_per_second, timeout_per_second);
printf(" %10.2f %10.2f %10.2f\n",
average_latency, min_latency, max_latency);
stats->total_io_per_second += io_per_second;
stats->total_mb_per_second += mb_per_second;
stats->total_failed_per_second += failed_per_second;
stats->total_timeout_per_second += timeout_per_second;
stats->total_io_completed += job->io_completed + job->io_failed;
stats->total_tsc += latency_info.total;
if (min_latency < stats->min_latency) {
stats->min_latency = min_latency;
}
if (max_latency > stats->max_latency) {
stats->max_latency = max_latency;
}
}
static void
@ -364,6 +446,7 @@ free_job_config(void)
static void
bdevperf_job_free(struct bdevperf_job *job)
{
spdk_histogram_data_free(job->histogram);
spdk_bit_array_free(&job->outstanding);
spdk_zipf_free(&job->zipf);
free(job->name);
@ -376,13 +459,53 @@ job_thread_exit(void *ctx)
spdk_thread_exit(spdk_get_thread());
}
static void
check_cutoff(void *ctx, uint64_t start, uint64_t end, uint64_t count,
uint64_t total, uint64_t so_far)
{
double so_far_pct;
double **cutoff = ctx;
uint64_t tsc_rate;
if (count == 0) {
return;
}
tsc_rate = spdk_get_ticks_hz();
so_far_pct = (double)so_far / total;
while (so_far_pct >= **cutoff && **cutoff > 0) {
printf("%9.5f%% : %9.3fus\n", **cutoff * 100, (double)end * 1000 * 1000 / tsc_rate);
(*cutoff)++;
}
}
static void
print_bucket(void *ctx, uint64_t start, uint64_t end, uint64_t count,
uint64_t total, uint64_t so_far)
{
double so_far_pct;
uint64_t tsc_rate;
if (count == 0) {
return;
}
tsc_rate = spdk_get_ticks_hz();
so_far_pct = (double)so_far * 100 / total;
printf("%9.3f - %9.3f: %9.4f%% (%9ju)\n",
(double)start * 1000 * 1000 / tsc_rate,
(double)end * 1000 * 1000 / tsc_rate,
so_far_pct, count);
}
static void
bdevperf_test_done(void *ctx)
{
struct bdevperf_job *job, *jtmp;
struct bdevperf_task *task, *ttmp;
int rc;
double average_latency = 0.0;
uint64_t time_in_usec;
int rc;
if (g_time_in_usec) {
g_stats.io_time_in_usec = g_time_in_usec;
@ -405,13 +528,63 @@ bdevperf_test_done(void *ctx)
(double)g_time_in_usec / 1000000);
}
printf("\n\r %-*s: %10s %10s %10s %10s %8s\n",
28, "Device Information", "runtime(s)", "IOPS", "MiB/s", "Fail/s", "TO/s");
printf("\n%*s\n", 107, "Latency(us)");
printf("\r %-*s: %10s %10s %10s %10s %8s %10s %10s %10s\n",
28, "Device Information", "runtime(s)", "IOPS", "MiB/s", "Fail/s", "TO/s", "Average", "min", "max");
TAILQ_FOREACH_SAFE(job, &g_bdevperf.jobs, link, jtmp) {
TAILQ_REMOVE(&g_bdevperf.jobs, job, link);
performance_dump_job(&g_stats, job);
}
printf("\r =================================================================================="
"=================================\n");
printf("\r %-28s: %10s %10.2f %10.2f",
"Total", "", g_stats.total_io_per_second, g_stats.total_mb_per_second);
printf(" %10.2f %8.2f",
g_stats.total_failed_per_second, g_stats.total_timeout_per_second);
if (g_stats.total_io_completed != 0) {
average_latency = ((double)g_stats.total_tsc / g_stats.total_io_completed) * 1000 * 1000 /
spdk_get_ticks_hz();
}
printf(" %10.2f %10.2f %10.2f\n", average_latency, g_stats.min_latency, g_stats.max_latency);
fflush(stdout);
if (g_latency_display_level == 0 || g_stats.total_io_completed == 0) {
goto clean;
}
printf("\n Latency summary\n");
TAILQ_FOREACH_SAFE(job, &g_bdevperf.jobs, link, jtmp) {
printf("\r =============================================\n");
printf("\r Job: %s (Core Mask 0x%s)\n", spdk_thread_get_name(job->thread),
spdk_cpuset_fmt(spdk_thread_get_cpumask(job->thread)));
const double *cutoff = g_latency_cutoffs;
spdk_histogram_data_iterate(job->histogram, check_cutoff, &cutoff);
printf("\n");
}
if (g_latency_display_level == 1) {
goto clean;
}
printf("\r Latency histogram\n");
TAILQ_FOREACH_SAFE(job, &g_bdevperf.jobs, link, jtmp) {
printf("\r =============================================\n");
printf("\r Job: %s (Core Mask 0x%s)\n", spdk_thread_get_name(job->thread),
spdk_cpuset_fmt(spdk_thread_get_cpumask(job->thread)));
spdk_histogram_data_iterate(job->histogram, print_bucket, NULL);
printf("\n");
}
clean:
TAILQ_FOREACH_SAFE(job, &g_bdevperf.jobs, link, jtmp) {
TAILQ_REMOVE(&g_bdevperf.jobs, job, link);
spdk_thread_send_msg(job->thread, job_thread_exit, NULL);
@ -425,13 +598,6 @@ bdevperf_test_done(void *ctx)
bdevperf_job_free(job);
}
printf("\r ==================================================================================\n");
printf("\r %-28s: %10s %10.2f %10.2f",
"Total", "", g_stats.total_io_per_second, g_stats.total_mb_per_second);
printf(" %10.2f %8.2f\n",
g_stats.total_failed_per_second, g_stats.total_timeout_per_second);
fflush(stdout);
rc = g_run_rc;
if (g_request && !g_shutdown) {
rpc_perform_tests_cb();
@ -453,6 +619,13 @@ bdevperf_job_end(void *ctx)
}
}
static void
bdevperf_channel_get_histogram_cb(void *cb_arg, int status, struct spdk_histogram_data *histogram)
{
struct spdk_histogram_data *job_hist = cb_arg;
spdk_histogram_data_merge(job_hist, histogram);
}
static void
bdevperf_end_task(struct bdevperf_task *task)
{
@ -464,6 +637,11 @@ bdevperf_end_task(struct bdevperf_task *task)
if (job->current_queue_depth == 0) {
end_tsc = spdk_get_ticks() - g_start_tsc;
job->run_time_in_usec = end_tsc * 1000000 / spdk_get_ticks_hz();
/* keep histogram info before channel is destroyed */
spdk_bdev_channel_get_histogram(job->bdev, job->ch, bdevperf_channel_get_histogram_cb,
job->histogram);
spdk_put_io_channel(job->ch);
spdk_bdev_close(job->bdev_desc);
spdk_thread_send_msg(g_main_thread, bdevperf_job_end, NULL);
@ -1061,12 +1239,20 @@ static void
_performance_dump_done(void *ctx)
{
struct bdevperf_aggregate_stats *stats = ctx;
double average_latency;
printf("\r ==================================================================================\n");
printf("\r =================================================================================="
"=================================\n");
printf("\r %-28s: %10s %10.2f %10.2f",
"Total", "", stats->total_io_per_second, stats->total_mb_per_second);
printf(" %10.2f %8.2f\n",
printf(" %10.2f %8.2f",
stats->total_failed_per_second, stats->total_timeout_per_second);
average_latency = ((double)stats->total_tsc / stats->total_io_completed) * 1000 * 1000 /
spdk_get_ticks_hz();
printf(" %10.2f %10.2f %10.2f\n", average_latency, stats->min_latency, stats->max_latency);
printf("\n");
fflush(stdout);
g_performance_dump_active = false;
@ -1107,6 +1293,8 @@ performance_statistics_thread(void *arg)
return -1;
}
stats->min_latency = (double)UINT64_MAX;
g_show_performance_period_num++;
stats->io_time_in_usec = g_show_performance_period_num * g_show_performance_period_in_usec;
@ -1137,8 +1325,9 @@ bdevperf_test(void)
/* Start a timer to dump performance numbers */
g_start_tsc = spdk_get_ticks();
if (g_show_performance_real_time && !g_perf_timer) {
printf("\r %-*s: %10s %10s %10s %10s %8s\n",
28, "Device Information", "runtime(s)", "IOPS", "MiB/s", "Fail/s", "TO/s");
printf("%*s\n", 107, "Latency(us)");
printf("\r %-*s: %10s %10s %10s %10s %8s %10s %10s %10s\n",
28, "Device Information", "runtime(s)", "IOPS", "MiB/s", "Fail/s", "TO/s", "Average", "min", "max");
g_perf_timer = SPDK_POLLER_REGISTER(performance_statistics_thread, NULL,
g_show_performance_period_in_usec);
@ -1161,21 +1350,70 @@ bdevperf_bdev_removed(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, vo
}
}
static void
bdevperf_histogram_status_cb(void *cb_arg, int status)
{
if (status != 0) {
g_run_rc = status;
if (g_continue_on_failure == false) {
g_error_to_exit = true;
}
}
if (--g_bdev_count == 0) {
if (g_run_rc == 0) {
/* Ready to run the test */
bdevperf_test();
} else {
bdevperf_test_done(NULL);
}
}
}
static uint32_t g_construct_job_count = 0;
static void
_bdevperf_enable_histogram(bool enable)
{
struct spdk_bdev *bdev;
/* increment initial g_bdev_count so that it will never reach 0 in the middle of iteration */
g_bdev_count = 1;
if (g_job_bdev_name != NULL) {
bdev = spdk_bdev_get_by_name(g_job_bdev_name);
if (bdev) {
g_bdev_count++;
spdk_bdev_histogram_enable(bdev, bdevperf_histogram_status_cb, NULL, enable);
} else {
fprintf(stderr, "Unable to find bdev '%s'\n", g_job_bdev_name);
}
} else {
bdev = spdk_bdev_first_leaf();
while (bdev != NULL) {
g_bdev_count++;
spdk_bdev_histogram_enable(bdev, bdevperf_histogram_status_cb, NULL, enable);
bdev = spdk_bdev_next_leaf(bdev);
}
}
bdevperf_histogram_status_cb(NULL, 0);
}
static void
_bdevperf_construct_job_done(void *ctx)
{
if (--g_construct_job_count == 0) {
if (g_run_rc != 0) {
/* Something failed. */
bdevperf_test_done(NULL);
return;
}
/* Ready to run the test */
bdevperf_test();
/* always enable histogram. */
_bdevperf_enable_histogram(true);
} else if (g_run_rc != 0) {
/* Reset error as some jobs constructed right */
g_run_rc = 0;
@ -1397,6 +1635,13 @@ bdevperf_construct_job(struct spdk_bdev *bdev, struct job_config *config,
}
}
job->histogram = spdk_histogram_data_alloc();
if (job->histogram == NULL) {
fprintf(stderr, "Failed to allocate histogram\n");
bdevperf_job_free(job);
return -ENOMEM;
}
TAILQ_INIT(&job->task_list);
task_num = job->queue_depth;
@ -2041,6 +2286,8 @@ bdevperf_parse_arg(int ch, char *arg)
fprintf(stderr, "Illegal zipf theta value %s\n", optarg);
return -EINVAL;
}
} else if (ch == 'l') {
g_latency_display_level++;
} else {
tmp = spdk_strtoll(optarg, 10);
if (tmp < 0) {
@ -2104,6 +2351,7 @@ bdevperf_usage(void)
printf(" -X abort timed out I/O\n");
printf(" -C enable every core to send I/Os to each bdev\n");
printf(" -j <filename> use job config file\n");
printf(" -l display latency histogram, default: disable. -l display summary, -ll display details\n");
}
static int
@ -2216,7 +2464,7 @@ main(int argc, char **argv)
opts.rpc_addr = NULL;
opts.shutdown_cb = spdk_bdevperf_shutdown_cb;
if ((rc = spdk_app_parse_args(argc, argv, &opts, "Zzfq:o:t:w:k:CF:M:P:S:T:Xj:", NULL,
if ((rc = spdk_app_parse_args(argc, argv, &opts, "Zzfq:o:t:w:k:CF:M:P:S:T:Xlj:", NULL,
bdevperf_parse_arg, bdevperf_usage)) !=
SPDK_APP_PARSE_ARGS_SUCCESS) {
return rc;