diff --git a/CHANGELOG.md b/CHANGELOG.md index c19304307..1e06c5b84 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,9 @@ as there is no way of saving original SPDK thread distribution on reactors. New APIs `spdk_bdev_for_each_channel` and `spdk_bdev_for_each_channel_continue` and associated function pointers were added to iterate each channel of the required bdev. +The RPC `bdev_get_iostat` now allows a user to query the per channel IO statistics for +required bdev. + ## v22.09 ### accel diff --git a/doc/jsonrpc.md b/doc/jsonrpc.md index 05276f850..7f2b0a8d1 100644 --- a/doc/jsonrpc.md +++ b/doc/jsonrpc.md @@ -1920,6 +1920,7 @@ specified by name. Name | Optional | Type | Description ----------------------- | -------- | ----------- | ----------- name | Optional | string | Block device name +per_channel | Optional | bool | Display per channel data for specified block device. #### Response @@ -1935,7 +1936,8 @@ Example request: "id": 1, "method": "bdev_get_iostat", "params": { - "name": "Nvme0n1" + "name": "Nvme0n1", + "per_channel": false } } ~~~ diff --git a/lib/bdev/bdev_rpc.c b/lib/bdev/bdev_rpc.c index 3426b1d93..450cbc598 100644 --- a/lib/bdev/bdev_rpc.c +++ b/lib/bdev/bdev_rpc.c @@ -162,6 +162,7 @@ struct rpc_bdev_get_iostat_ctx { int rc; struct spdk_jsonrpc_request *request; struct spdk_json_write_ctx *w; + bool per_channel; }; struct rpc_bdev_iostat { @@ -171,15 +172,25 @@ struct rpc_bdev_iostat { }; static void -rpc_bdev_get_iostat_started(struct rpc_bdev_get_iostat_ctx *ctx) +rpc_bdev_get_iostat_started(struct rpc_bdev_get_iostat_ctx *ctx, + struct spdk_bdev_desc *desc) { + struct spdk_bdev *bdev; + ctx->w = spdk_jsonrpc_begin_result(ctx->request); spdk_json_write_object_begin(ctx->w); spdk_json_write_named_uint64(ctx->w, "tick_rate", spdk_get_ticks_hz()); spdk_json_write_named_uint64(ctx->w, "ticks", spdk_get_ticks()); - spdk_json_write_named_array_begin(ctx->w, "bdevs"); + if (ctx->per_channel == false) { + spdk_json_write_named_array_begin(ctx->w, "bdevs"); + } else { + bdev = spdk_bdev_desc_get_bdev(desc); + + spdk_json_write_named_string(ctx->w, "name", spdk_bdev_get_name(bdev)); + spdk_json_write_named_array_begin(ctx->w, "channels"); + } } static void @@ -206,10 +217,8 @@ rpc_bdev_get_iostat_done(struct rpc_bdev_get_iostat_ctx *ctx) static void rpc_bdev_get_iostat_dump(struct spdk_json_write_ctx *w, - struct spdk_bdev *bdev, struct spdk_bdev_io_stat *stat) { - spdk_json_write_named_string(w, "name", spdk_bdev_get_name(bdev)); spdk_json_write_named_uint64(w, "bytes_read", stat->bytes_read); spdk_json_write_named_uint64(w, "num_read_ops", stat->num_read_ops); spdk_json_write_named_uint64(w, "bytes_written", stat->bytes_written); @@ -240,7 +249,9 @@ rpc_bdev_get_iostat_cb(struct spdk_bdev *bdev, spdk_json_write_object_begin(w); - rpc_bdev_get_iostat_dump(w, bdev, stat); + spdk_json_write_named_string(w, "name", spdk_bdev_get_name(bdev)); + + rpc_bdev_get_iostat_dump(w, stat); if (spdk_bdev_get_qd_sampling_period(bdev)) { spdk_json_write_named_uint64(w, "queue_depth_polling_period", @@ -265,6 +276,7 @@ done: struct rpc_bdev_get_iostat { char *name; + bool per_channel; }; static void @@ -275,6 +287,7 @@ free_rpc_bdev_get_iostat(struct rpc_bdev_get_iostat *r) static const struct spdk_json_object_decoder rpc_bdev_get_iostat_decoders[] = { {"name", offsetof(struct rpc_bdev_get_iostat, name), spdk_json_decode_string, true}, + {"per_channel", offsetof(struct rpc_bdev_get_iostat, per_channel), spdk_json_decode_bool, true}, }; static int @@ -304,6 +317,36 @@ _bdev_get_device_stat(void *_ctx, struct spdk_bdev *bdev) return 0; } +static void +rpc_bdev_get_per_channel_stat_done(struct spdk_bdev *bdev, void *ctx, int status) +{ + struct rpc_bdev_iostat *_stat = ctx; + + rpc_bdev_get_iostat_done(_stat->ctx); + + spdk_bdev_close(_stat->desc); + + free(_stat); +} + +static void +rpc_bdev_get_per_channel_stat(struct spdk_bdev_channel_iter *i, struct spdk_bdev *bdev, + struct spdk_io_channel *ch, void *ctx) +{ + struct rpc_bdev_iostat *_stat = ctx; + struct spdk_json_write_ctx *w = _stat->ctx->w; + struct spdk_bdev_io_stat stat; + + spdk_bdev_get_io_stat(bdev, ch, &stat); + + spdk_json_write_object_begin(w); + spdk_json_write_named_uint64(w, "thread_id", spdk_thread_get_id(spdk_get_thread())); + rpc_bdev_get_iostat_dump(w, &stat); + spdk_json_write_object_end(w); + + spdk_bdev_for_each_channel_continue(i, 0); +} + static void rpc_bdev_get_iostat(struct spdk_jsonrpc_request *request, const struct spdk_json_val *params) @@ -325,6 +368,13 @@ rpc_bdev_get_iostat(struct spdk_jsonrpc_request *request, return; } + if (req.per_channel == true && !req.name) { + SPDK_ERRLOG("Bdev name is required for per channel IO statistics\n"); + spdk_jsonrpc_send_error_response(request, -EINVAL, spdk_strerror(EINVAL)); + free_rpc_bdev_get_iostat(&req); + return; + } + if (req.name) { rc = spdk_bdev_open_ext(req.name, false, dummy_bdev_event_cb, NULL, &desc); if (rc != 0) { @@ -351,6 +401,7 @@ rpc_bdev_get_iostat(struct spdk_jsonrpc_request *request, */ ctx->bdev_count++; ctx->request = request; + ctx->per_channel = req.per_channel; if (desc != NULL) { _stat = calloc(1, sizeof(struct rpc_bdev_iostat)); @@ -364,8 +415,15 @@ rpc_bdev_get_iostat(struct spdk_jsonrpc_request *request, ctx->bdev_count++; _stat->ctx = ctx; - spdk_bdev_get_device_stat(spdk_bdev_desc_get_bdev(desc), &_stat->stat, - rpc_bdev_get_iostat_cb, _stat); + if (req.per_channel == false) { + spdk_bdev_get_device_stat(spdk_bdev_desc_get_bdev(desc), &_stat->stat, + rpc_bdev_get_iostat_cb, _stat); + } else { + spdk_bdev_for_each_channel(spdk_bdev_desc_get_bdev(desc), + rpc_bdev_get_per_channel_stat, + _stat, + rpc_bdev_get_per_channel_stat_done); + } } } else { rc = spdk_for_each_bdev(ctx, _bdev_get_device_stat); @@ -376,11 +434,11 @@ rpc_bdev_get_iostat(struct spdk_jsonrpc_request *request, if (ctx->rc == 0) { /* We want to fail the RPC for all failures. The callback function to - * spdk_bdev_get_device_stat() is executed after stack unwinding if successful. - * Hence defer starting RPC response until it is ensured that all - * spdk_bdev_get_device_stat() calls will succeed or there is no bdev. + * spdk_bdev_for_each_channel() is executed after stack unwinding if + * successful. Hence defer starting RPC response until it is ensured that + * all spdk_bdev_for_each_channel() calls will succeed or there is no bdev. */ - rpc_bdev_get_iostat_started(ctx); + rpc_bdev_get_iostat_started(ctx, desc); } rpc_bdev_get_iostat_done(ctx); diff --git a/python/spdk/rpc/bdev.py b/python/spdk/rpc/bdev.py index d214ef78d..c0857d32d 100644 --- a/python/spdk/rpc/bdev.py +++ b/python/spdk/rpc/bdev.py @@ -1467,11 +1467,12 @@ def bdev_get_bdevs(client, name=None, timeout=None): return client.call('bdev_get_bdevs', params) -def bdev_get_iostat(client, name=None): +def bdev_get_iostat(client, name=None, per_channel=None): """Get I/O statistics for block devices. Args: name: bdev name to query (optional; if omitted, query all bdevs) + per_channel: display per channel IO stats for specified bdev Returns: I/O statistics for the requested block devices. @@ -1479,6 +1480,8 @@ def bdev_get_iostat(client, name=None): params = {} if name: params['name'] = name + if per_channel: + params['per_channel'] = per_channel return client.call('bdev_get_iostat', params) diff --git a/scripts/rpc.py b/scripts/rpc.py index 7047b66f1..393a7e6dc 100755 --- a/scripts/rpc.py +++ b/scripts/rpc.py @@ -1094,11 +1094,14 @@ if __name__ == "__main__": def bdev_get_iostat(args): print_dict(rpc.bdev.bdev_get_iostat(args.client, - name=args.name)) + name=args.name, + per_channel=args.per_channel)) p = subparsers.add_parser('bdev_get_iostat', help='Display current I/O statistics of all the blockdevs or required blockdev.') p.add_argument('-b', '--name', help="Name of the Blockdev. Example: Nvme0n1", required=False) + p.add_argument('-c', '--per-channel', default=False, dest='per_channel', help='Display per channel IO stats for specified device', + action='store_true', required=False) p.set_defaults(func=bdev_get_iostat) def bdev_enable_histogram(args): diff --git a/test/bdev/blockdev.sh b/test/bdev/blockdev.sh index 30daaf290..6fb52a768 100755 --- a/test/bdev/blockdev.sh +++ b/test/bdev/blockdev.sh @@ -488,6 +488,61 @@ function qd_sampling_test_suite() { trap - SIGINT SIGTERM EXIT } +function stat_function_test() { + local bdev_name=$1 + local iostats + local io_count1 + local io_count2 + local iostats_per_channel + local io_count_per_channel1 + local io_count_per_channel2 + local io_count_per_channel_all=0 + + iostats=$($rpc_py bdev_get_iostat -b $bdev_name) + io_count1=$(jq -r '.bdevs[0].num_read_ops' <<< "$iostats") + + iostats_per_channel=$($rpc_py bdev_get_iostat -b $bdev_name -c) + io_count_per_channel1=$(jq -r '.channels[0].num_read_ops' <<< "$iostats_per_channel") + io_count_per_channel_all=$((io_count_per_channel_all + io_count_per_channel1)) + io_count_per_channel2=$(jq -r '.channels[1].num_read_ops' <<< "$iostats_per_channel") + io_count_per_channel_all=$((io_count_per_channel_all + io_count_per_channel2)) + + iostats=$($rpc_py bdev_get_iostat -b $bdev_name) + io_count2=$(jq -r '.bdevs[0].num_read_ops' <<< "$iostats") + + # There is little time passed between the three iostats collected. So that + # the accumulated statistics from per channel data shall be bigger than the + # the first run and smaller than the third run in this short time of period. + if [ $io_count_per_channel_all -lt $io_count1 ] || [ $io_count_per_channel_all -gt $io_count2 ]; then + echo "Failed to collect the per Core IO statistics" + $rpc_py bdev_malloc_delete $STAT_DEV + killprocess $STAT_PID + exit 1 + fi +} + +function stat_test_suite() { + STAT_DEV="Malloc_STAT" + + # Run bdevperf with 2 cores so as to collect per Core IO statistics + "$testdir/bdevperf/bdevperf" -z -m 0x3 -q 256 -o 4096 -w randread -t 10 -C "$env_ctx" & + STAT_PID=$! + echo "Process Bdev IO statistics testing pid: $STAT_PID" + trap 'cleanup; killprocess $STAT_PID; exit 1' SIGINT SIGTERM EXIT + waitforlisten $STAT_PID + + $rpc_py bdev_malloc_create -b $STAT_DEV 128 512 + waitforbdev $STAT_DEV + + $rootdir/test/bdev/bdevperf/bdevperf.py perform_tests & + sleep 2 + stat_function_test $STAT_DEV + + $rpc_py bdev_malloc_delete $STAT_DEV + killprocess $STAT_PID + trap - SIGINT SIGTERM EXIT +} + # Inital bdev creation and configuration #----------------------------------------------------- QOS_DEV_1="Malloc_0" @@ -609,6 +664,7 @@ if [[ $test_type == bdev ]]; then run_test "bdev_qos" qos_test_suite "$env_ctx" run_test "bdev_qd_sampling" qd_sampling_test_suite "$env_ctx" run_test "bdev_error" error_test_suite "$env_ctx" + run_test "bdev_stat" stat_test_suite "$env_ctx" fi # Temporarily disabled - infinite loop