RPC/Bdev: display the per channel IO statistics for required Bdev

Add a new parameter "-c" to display the per channel IO statistics
for required Bdev

./scripts/rpc.py bdev_get_iostat -b Malloc0 -h
usage: rpc.py [options] bdev_get_iostat [-h] [-b NAME] [-c]

optional arguments:
  -h, --help            show this help message and exit
  -b NAME, --name NAME  Name of the Blockdev. Example: Nvme0n1
  -c, --per-channel     Display per channel IO stats for specified device

This could give more intuitive information on each channel's processing
of the IOs with the associated thread on the same Bdev.

Please also be aware that the IO statistics are collected from SPDK
thread's related channel's information. So that it is more relating
to the SPDK thread. And in the dynamic scheduling case, different
SPDK thread could be running on the same Core.

In this case, any seperate channel's IO statistics are returned to
the RPC call and if needed, further parse of the data is needed to
get the per Core information although usually there is one thread
per Core.

On the other hand, user could run the framework_get_reactors RPC
method to get the relationship of the thread and CPU Cores so as
to get the precise information of IO runnings on each thread and
each Core for the same Bdev.

Change-Id: I39d6a2c9faa868e3c1d7fd0fb6e7c020df982585
Signed-off-by: GangCao <gang.cao@intel.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/13011
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Community-CI: Mellanox Build Bot
Reviewed-by: Jim Harris <james.r.harris@intel.com>
Reviewed-by: Shuhei Matsumoto <smatsumoto@nvidia.com>
This commit is contained in:
GangCao 2022-06-10 04:50:35 -04:00 committed by Tomasz Zawadzki
parent f0494649e3
commit e28e247954
6 changed files with 139 additions and 14 deletions

View File

@ -18,6 +18,9 @@ as there is no way of saving original SPDK thread distribution on reactors.
New APIs `spdk_bdev_for_each_channel` and `spdk_bdev_for_each_channel_continue` and
associated function pointers were added to iterate each channel of the required bdev.
The RPC `bdev_get_iostat` now allows a user to query the per channel IO statistics for
required bdev.
## v22.09
### accel

View File

@ -1920,6 +1920,7 @@ specified by name.
Name | Optional | Type | Description
----------------------- | -------- | ----------- | -----------
name | Optional | string | Block device name
per_channel | Optional | bool | Display per channel data for specified block device.
#### Response
@ -1935,7 +1936,8 @@ Example request:
"id": 1,
"method": "bdev_get_iostat",
"params": {
"name": "Nvme0n1"
"name": "Nvme0n1",
"per_channel": false
}
}
~~~

View File

@ -162,6 +162,7 @@ struct rpc_bdev_get_iostat_ctx {
int rc;
struct spdk_jsonrpc_request *request;
struct spdk_json_write_ctx *w;
bool per_channel;
};
struct rpc_bdev_iostat {
@ -171,15 +172,25 @@ struct rpc_bdev_iostat {
};
static void
rpc_bdev_get_iostat_started(struct rpc_bdev_get_iostat_ctx *ctx)
rpc_bdev_get_iostat_started(struct rpc_bdev_get_iostat_ctx *ctx,
struct spdk_bdev_desc *desc)
{
struct spdk_bdev *bdev;
ctx->w = spdk_jsonrpc_begin_result(ctx->request);
spdk_json_write_object_begin(ctx->w);
spdk_json_write_named_uint64(ctx->w, "tick_rate", spdk_get_ticks_hz());
spdk_json_write_named_uint64(ctx->w, "ticks", spdk_get_ticks());
spdk_json_write_named_array_begin(ctx->w, "bdevs");
if (ctx->per_channel == false) {
spdk_json_write_named_array_begin(ctx->w, "bdevs");
} else {
bdev = spdk_bdev_desc_get_bdev(desc);
spdk_json_write_named_string(ctx->w, "name", spdk_bdev_get_name(bdev));
spdk_json_write_named_array_begin(ctx->w, "channels");
}
}
static void
@ -206,10 +217,8 @@ rpc_bdev_get_iostat_done(struct rpc_bdev_get_iostat_ctx *ctx)
static void
rpc_bdev_get_iostat_dump(struct spdk_json_write_ctx *w,
struct spdk_bdev *bdev,
struct spdk_bdev_io_stat *stat)
{
spdk_json_write_named_string(w, "name", spdk_bdev_get_name(bdev));
spdk_json_write_named_uint64(w, "bytes_read", stat->bytes_read);
spdk_json_write_named_uint64(w, "num_read_ops", stat->num_read_ops);
spdk_json_write_named_uint64(w, "bytes_written", stat->bytes_written);
@ -240,7 +249,9 @@ rpc_bdev_get_iostat_cb(struct spdk_bdev *bdev,
spdk_json_write_object_begin(w);
rpc_bdev_get_iostat_dump(w, bdev, stat);
spdk_json_write_named_string(w, "name", spdk_bdev_get_name(bdev));
rpc_bdev_get_iostat_dump(w, stat);
if (spdk_bdev_get_qd_sampling_period(bdev)) {
spdk_json_write_named_uint64(w, "queue_depth_polling_period",
@ -265,6 +276,7 @@ done:
struct rpc_bdev_get_iostat {
char *name;
bool per_channel;
};
static void
@ -275,6 +287,7 @@ free_rpc_bdev_get_iostat(struct rpc_bdev_get_iostat *r)
static const struct spdk_json_object_decoder rpc_bdev_get_iostat_decoders[] = {
{"name", offsetof(struct rpc_bdev_get_iostat, name), spdk_json_decode_string, true},
{"per_channel", offsetof(struct rpc_bdev_get_iostat, per_channel), spdk_json_decode_bool, true},
};
static int
@ -304,6 +317,36 @@ _bdev_get_device_stat(void *_ctx, struct spdk_bdev *bdev)
return 0;
}
static void
rpc_bdev_get_per_channel_stat_done(struct spdk_bdev *bdev, void *ctx, int status)
{
struct rpc_bdev_iostat *_stat = ctx;
rpc_bdev_get_iostat_done(_stat->ctx);
spdk_bdev_close(_stat->desc);
free(_stat);
}
static void
rpc_bdev_get_per_channel_stat(struct spdk_bdev_channel_iter *i, struct spdk_bdev *bdev,
struct spdk_io_channel *ch, void *ctx)
{
struct rpc_bdev_iostat *_stat = ctx;
struct spdk_json_write_ctx *w = _stat->ctx->w;
struct spdk_bdev_io_stat stat;
spdk_bdev_get_io_stat(bdev, ch, &stat);
spdk_json_write_object_begin(w);
spdk_json_write_named_uint64(w, "thread_id", spdk_thread_get_id(spdk_get_thread()));
rpc_bdev_get_iostat_dump(w, &stat);
spdk_json_write_object_end(w);
spdk_bdev_for_each_channel_continue(i, 0);
}
static void
rpc_bdev_get_iostat(struct spdk_jsonrpc_request *request,
const struct spdk_json_val *params)
@ -325,6 +368,13 @@ rpc_bdev_get_iostat(struct spdk_jsonrpc_request *request,
return;
}
if (req.per_channel == true && !req.name) {
SPDK_ERRLOG("Bdev name is required for per channel IO statistics\n");
spdk_jsonrpc_send_error_response(request, -EINVAL, spdk_strerror(EINVAL));
free_rpc_bdev_get_iostat(&req);
return;
}
if (req.name) {
rc = spdk_bdev_open_ext(req.name, false, dummy_bdev_event_cb, NULL, &desc);
if (rc != 0) {
@ -351,6 +401,7 @@ rpc_bdev_get_iostat(struct spdk_jsonrpc_request *request,
*/
ctx->bdev_count++;
ctx->request = request;
ctx->per_channel = req.per_channel;
if (desc != NULL) {
_stat = calloc(1, sizeof(struct rpc_bdev_iostat));
@ -364,8 +415,15 @@ rpc_bdev_get_iostat(struct spdk_jsonrpc_request *request,
ctx->bdev_count++;
_stat->ctx = ctx;
spdk_bdev_get_device_stat(spdk_bdev_desc_get_bdev(desc), &_stat->stat,
rpc_bdev_get_iostat_cb, _stat);
if (req.per_channel == false) {
spdk_bdev_get_device_stat(spdk_bdev_desc_get_bdev(desc), &_stat->stat,
rpc_bdev_get_iostat_cb, _stat);
} else {
spdk_bdev_for_each_channel(spdk_bdev_desc_get_bdev(desc),
rpc_bdev_get_per_channel_stat,
_stat,
rpc_bdev_get_per_channel_stat_done);
}
}
} else {
rc = spdk_for_each_bdev(ctx, _bdev_get_device_stat);
@ -376,11 +434,11 @@ rpc_bdev_get_iostat(struct spdk_jsonrpc_request *request,
if (ctx->rc == 0) {
/* We want to fail the RPC for all failures. The callback function to
* spdk_bdev_get_device_stat() is executed after stack unwinding if successful.
* Hence defer starting RPC response until it is ensured that all
* spdk_bdev_get_device_stat() calls will succeed or there is no bdev.
* spdk_bdev_for_each_channel() is executed after stack unwinding if
* successful. Hence defer starting RPC response until it is ensured that
* all spdk_bdev_for_each_channel() calls will succeed or there is no bdev.
*/
rpc_bdev_get_iostat_started(ctx);
rpc_bdev_get_iostat_started(ctx, desc);
}
rpc_bdev_get_iostat_done(ctx);

View File

@ -1467,11 +1467,12 @@ def bdev_get_bdevs(client, name=None, timeout=None):
return client.call('bdev_get_bdevs', params)
def bdev_get_iostat(client, name=None):
def bdev_get_iostat(client, name=None, per_channel=None):
"""Get I/O statistics for block devices.
Args:
name: bdev name to query (optional; if omitted, query all bdevs)
per_channel: display per channel IO stats for specified bdev
Returns:
I/O statistics for the requested block devices.
@ -1479,6 +1480,8 @@ def bdev_get_iostat(client, name=None):
params = {}
if name:
params['name'] = name
if per_channel:
params['per_channel'] = per_channel
return client.call('bdev_get_iostat', params)

View File

@ -1094,11 +1094,14 @@ if __name__ == "__main__":
def bdev_get_iostat(args):
print_dict(rpc.bdev.bdev_get_iostat(args.client,
name=args.name))
name=args.name,
per_channel=args.per_channel))
p = subparsers.add_parser('bdev_get_iostat',
help='Display current I/O statistics of all the blockdevs or required blockdev.')
p.add_argument('-b', '--name', help="Name of the Blockdev. Example: Nvme0n1", required=False)
p.add_argument('-c', '--per-channel', default=False, dest='per_channel', help='Display per channel IO stats for specified device',
action='store_true', required=False)
p.set_defaults(func=bdev_get_iostat)
def bdev_enable_histogram(args):

View File

@ -488,6 +488,61 @@ function qd_sampling_test_suite() {
trap - SIGINT SIGTERM EXIT
}
function stat_function_test() {
local bdev_name=$1
local iostats
local io_count1
local io_count2
local iostats_per_channel
local io_count_per_channel1
local io_count_per_channel2
local io_count_per_channel_all=0
iostats=$($rpc_py bdev_get_iostat -b $bdev_name)
io_count1=$(jq -r '.bdevs[0].num_read_ops' <<< "$iostats")
iostats_per_channel=$($rpc_py bdev_get_iostat -b $bdev_name -c)
io_count_per_channel1=$(jq -r '.channels[0].num_read_ops' <<< "$iostats_per_channel")
io_count_per_channel_all=$((io_count_per_channel_all + io_count_per_channel1))
io_count_per_channel2=$(jq -r '.channels[1].num_read_ops' <<< "$iostats_per_channel")
io_count_per_channel_all=$((io_count_per_channel_all + io_count_per_channel2))
iostats=$($rpc_py bdev_get_iostat -b $bdev_name)
io_count2=$(jq -r '.bdevs[0].num_read_ops' <<< "$iostats")
# There is little time passed between the three iostats collected. So that
# the accumulated statistics from per channel data shall be bigger than the
# the first run and smaller than the third run in this short time of period.
if [ $io_count_per_channel_all -lt $io_count1 ] || [ $io_count_per_channel_all -gt $io_count2 ]; then
echo "Failed to collect the per Core IO statistics"
$rpc_py bdev_malloc_delete $STAT_DEV
killprocess $STAT_PID
exit 1
fi
}
function stat_test_suite() {
STAT_DEV="Malloc_STAT"
# Run bdevperf with 2 cores so as to collect per Core IO statistics
"$testdir/bdevperf/bdevperf" -z -m 0x3 -q 256 -o 4096 -w randread -t 10 -C "$env_ctx" &
STAT_PID=$!
echo "Process Bdev IO statistics testing pid: $STAT_PID"
trap 'cleanup; killprocess $STAT_PID; exit 1' SIGINT SIGTERM EXIT
waitforlisten $STAT_PID
$rpc_py bdev_malloc_create -b $STAT_DEV 128 512
waitforbdev $STAT_DEV
$rootdir/test/bdev/bdevperf/bdevperf.py perform_tests &
sleep 2
stat_function_test $STAT_DEV
$rpc_py bdev_malloc_delete $STAT_DEV
killprocess $STAT_PID
trap - SIGINT SIGTERM EXIT
}
# Inital bdev creation and configuration
#-----------------------------------------------------
QOS_DEV_1="Malloc_0"
@ -609,6 +664,7 @@ if [[ $test_type == bdev ]]; then
run_test "bdev_qos" qos_test_suite "$env_ctx"
run_test "bdev_qd_sampling" qd_sampling_test_suite "$env_ctx"
run_test "bdev_error" error_test_suite "$env_ctx"
run_test "bdev_stat" stat_test_suite "$env_ctx"
fi
# Temporarily disabled - infinite loop