diff --git a/examples/nvme/perf/perf.c b/examples/nvme/perf/perf.c index 59c458920..1c5b25046 100644 --- a/examples/nvme/perf/perf.c +++ b/examples/nvme/perf/perf.c @@ -136,6 +136,11 @@ struct ns_worker_stats { uint64_t total_tsc; uint64_t min_tsc; uint64_t max_tsc; + uint64_t last_tsc; + uint64_t busy_tsc; + uint64_t idle_tsc; + uint64_t last_busy_tsc; + uint64_t last_idle_tsc; }; struct ns_worker_ctx { @@ -203,7 +208,7 @@ struct ns_fn_table { int (*submit_io)(struct perf_task *task, struct ns_worker_ctx *ns_ctx, struct ns_entry *entry, uint64_t offset_in_ios); - void (*check_io)(struct ns_worker_ctx *ns_ctx); + int64_t (*check_io)(struct ns_worker_ctx *ns_ctx); void (*verify_io)(struct perf_task *task, struct ns_entry *entry); @@ -231,6 +236,8 @@ static pthread_barrier_t g_worker_sync_barrier; static uint64_t g_tsc_rate; +static bool g_monitor_perf_cores = false; + static uint32_t g_io_align = 0x200; static bool g_io_align_specified; static uint32_t g_io_size_bytes; @@ -447,10 +454,10 @@ uring_submit_io(struct perf_task *task, struct ns_worker_ctx *ns_ctx, return 0; } -static void +static int64_t uring_check_io(struct ns_worker_ctx *ns_ctx) { - int i, count, to_complete, to_submit, ret = 0; + int i, to_complete, to_submit, count = 0, ret = 0; struct perf_task *task; to_submit = ns_ctx->u.uring.io_pending; @@ -460,7 +467,7 @@ uring_check_io(struct ns_worker_ctx *ns_ctx) * It will automatically call spdk_io_uring_enter appropriately. */ ret = io_uring_submit(&ns_ctx->u.uring.ring); if (ret < 0) { - return; + return -1; } ns_ctx->u.uring.io_pending = 0; ns_ctx->u.uring.io_inflight += to_submit; @@ -481,6 +488,7 @@ uring_check_io(struct ns_worker_ctx *ns_ctx) task_complete(task); } } + return count; } static void @@ -580,7 +588,7 @@ aio_submit_io(struct perf_task *task, struct ns_worker_ctx *ns_ctx, } } -static void +static int64_t aio_check_io(struct ns_worker_ctx *ns_ctx) { int count, i; @@ -598,6 +606,7 @@ aio_check_io(struct ns_worker_ctx *ns_ctx) for (i = 0; i < count; i++) { task_complete(ns_ctx->u.aio.events[i].data); } + return count; } static void @@ -884,7 +893,7 @@ perf_disconnect_cb(struct spdk_nvme_qpair *qpair, void *ctx) } -static void +static int64_t nvme_check_io(struct ns_worker_ctx *ns_ctx) { int64_t rc; @@ -894,6 +903,7 @@ nvme_check_io(struct ns_worker_ctx *ns_ctx) fprintf(stderr, "NVMe io qpair process completion error\n"); exit(1); } + return rc; } static void @@ -1404,6 +1414,11 @@ print_periodic_performance(bool warmup) double mb_this_second; struct worker_thread *worker; struct ns_worker_ctx *ns_ctx; + uint64_t busy_tsc; + uint64_t idle_tsc; + uint64_t core_busy_tsc = 0; + uint64_t core_idle_tsc = 0; + double core_busy_perc = 0; if (!isatty(STDOUT_FILENO)) { /* Don't print periodic stats if output is not going @@ -1411,17 +1426,34 @@ print_periodic_performance(bool warmup) */ return; } - io_this_second = 0; TAILQ_FOREACH(worker, &g_workers, link) { + busy_tsc = 0; + idle_tsc = 0; TAILQ_FOREACH(ns_ctx, &worker->ns_ctx, link) { io_this_second += ns_ctx->stats.io_completed - ns_ctx->stats.last_io_completed; ns_ctx->stats.last_io_completed = ns_ctx->stats.io_completed; + + if (g_monitor_perf_cores) { + busy_tsc += ns_ctx->stats.busy_tsc - ns_ctx->stats.last_busy_tsc; + idle_tsc += ns_ctx->stats.idle_tsc - ns_ctx->stats.last_idle_tsc; + ns_ctx->stats.last_busy_tsc = ns_ctx->stats.busy_tsc; + ns_ctx->stats.last_idle_tsc = ns_ctx->stats.idle_tsc; + } + } + if (g_monitor_perf_cores) { + core_busy_tsc += busy_tsc; + core_idle_tsc += idle_tsc; + core_busy_perc += (double)core_busy_tsc / (core_idle_tsc + core_busy_tsc) * 100; } } - mb_this_second = (double)io_this_second * g_io_size_bytes / (1024 * 1024); - printf("%s%9ju IOPS, %8.2f MiB/s\r", warmup ? "[warmup] " : "", io_this_second, mb_this_second); + + printf("%s%9ju IOPS, %8.2f MiB/s", warmup ? "[warmup] " : "", io_this_second, mb_this_second); + if (g_monitor_perf_cores) { + printf("%3d Core(s): %6.2f%% Busy", g_num_workers, core_busy_perc); + } + printf("\r"); fflush(stdout); } @@ -1434,6 +1466,8 @@ work_fn(void *arg) uint32_t unfinished_ns_ctx; bool warmup = false; int rc; + int64_t check_rc; + uint64_t check_now; /* Allocate queue pairs for each namespace. */ TAILQ_FOREACH(ns_ctx, &worker->ns_ctx, link) { @@ -1473,7 +1507,15 @@ work_fn(void *arg) * to replace each I/O that is completed. */ TAILQ_FOREACH(ns_ctx, &worker->ns_ctx, link) { - ns_ctx->entry->fn_table->check_io(ns_ctx); + check_now = spdk_get_ticks(); + check_rc = ns_ctx->entry->fn_table->check_io(ns_ctx); + + if (check_rc > 0) { + ns_ctx->stats.busy_tsc += check_now - ns_ctx->stats.last_tsc; + } else { + ns_ctx->stats.idle_tsc += check_now - ns_ctx->stats.last_tsc; + } + ns_ctx->stats.last_tsc = check_now; } tsc_current = spdk_get_ticks(); @@ -1590,6 +1632,7 @@ static void usage(char *program_name) printf("\t[-A IO buffer alignment. Must be power of 2 and not less than cache line (%u)]\n", SPDK_CACHE_LINE_SIZE); printf("\t[-S set the default sock impl, e.g. \"posix\"]\n"); + printf("\t[-m display real-time overall cpu usage on used cores]\n"); #ifdef SPDK_CONFIG_URING printf("\t[-R enable using liburing to drive kernel devices (Default: libaio)]\n"); #endif @@ -2022,7 +2065,7 @@ parse_args(int argc, char **argv) int rc; while ((op = getopt(argc, argv, - "a:b:c:e:gi:lo:q:r:k:s:t:w:z:A:C:DGHILM:NO:P:Q:RS:T:U:VZ:")) != -1) { + "a:b:c:e:gi:lmo:q:r:k:s:t:w:z:A:C:DGHILM:NO:P:Q:RS:T:U:VZ:")) != -1) { switch (op) { case 'a': case 'A': @@ -2117,6 +2160,9 @@ parse_args(int argc, char **argv) case 'l': g_latency_ssd_tracking_enable = true; break; + case 'm': + g_monitor_perf_cores = true; + break; case 'r': if (add_trid(optarg)) { usage(argv[0]);