RPC/Bdev: display the per channel IO statistics for required Bdev
Add a new parameter "-c" to display the per channel IO statistics for required Bdev ./scripts/rpc.py bdev_get_iostat -b Malloc0 -h usage: rpc.py [options] bdev_get_iostat [-h] [-b NAME] [-c] optional arguments: -h, --help show this help message and exit -b NAME, --name NAME Name of the Blockdev. Example: Nvme0n1 -c, --per-channel Display per channel IO stats for specified device This could give more intuitive information on each channel's processing of the IOs with the associated thread on the same Bdev. Please also be aware that the IO statistics are collected from SPDK thread's related channel's information. So that it is more relating to the SPDK thread. And in the dynamic scheduling case, different SPDK thread could be running on the same Core. In this case, any seperate channel's IO statistics are returned to the RPC call and if needed, further parse of the data is needed to get the per Core information although usually there is one thread per Core. On the other hand, user could run the framework_get_reactors RPC method to get the relationship of the thread and CPU Cores so as to get the precise information of IO runnings on each thread and each Core for the same Bdev. Change-Id: I39d6a2c9faa868e3c1d7fd0fb6e7c020df982585 Signed-off-by: GangCao <gang.cao@intel.com> Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/13011 Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Community-CI: Mellanox Build Bot Reviewed-by: Jim Harris <james.r.harris@intel.com> Reviewed-by: Shuhei Matsumoto <smatsumoto@nvidia.com>
This commit is contained in:
parent
f0494649e3
commit
e28e247954
@ -18,6 +18,9 @@ as there is no way of saving original SPDK thread distribution on reactors.
|
||||
New APIs `spdk_bdev_for_each_channel` and `spdk_bdev_for_each_channel_continue` and
|
||||
associated function pointers were added to iterate each channel of the required bdev.
|
||||
|
||||
The RPC `bdev_get_iostat` now allows a user to query the per channel IO statistics for
|
||||
required bdev.
|
||||
|
||||
## v22.09
|
||||
|
||||
### accel
|
||||
|
@ -1920,6 +1920,7 @@ specified by name.
|
||||
Name | Optional | Type | Description
|
||||
----------------------- | -------- | ----------- | -----------
|
||||
name | Optional | string | Block device name
|
||||
per_channel | Optional | bool | Display per channel data for specified block device.
|
||||
|
||||
#### Response
|
||||
|
||||
@ -1935,7 +1936,8 @@ Example request:
|
||||
"id": 1,
|
||||
"method": "bdev_get_iostat",
|
||||
"params": {
|
||||
"name": "Nvme0n1"
|
||||
"name": "Nvme0n1",
|
||||
"per_channel": false
|
||||
}
|
||||
}
|
||||
~~~
|
||||
|
@ -162,6 +162,7 @@ struct rpc_bdev_get_iostat_ctx {
|
||||
int rc;
|
||||
struct spdk_jsonrpc_request *request;
|
||||
struct spdk_json_write_ctx *w;
|
||||
bool per_channel;
|
||||
};
|
||||
|
||||
struct rpc_bdev_iostat {
|
||||
@ -171,15 +172,25 @@ struct rpc_bdev_iostat {
|
||||
};
|
||||
|
||||
static void
|
||||
rpc_bdev_get_iostat_started(struct rpc_bdev_get_iostat_ctx *ctx)
|
||||
rpc_bdev_get_iostat_started(struct rpc_bdev_get_iostat_ctx *ctx,
|
||||
struct spdk_bdev_desc *desc)
|
||||
{
|
||||
struct spdk_bdev *bdev;
|
||||
|
||||
ctx->w = spdk_jsonrpc_begin_result(ctx->request);
|
||||
|
||||
spdk_json_write_object_begin(ctx->w);
|
||||
spdk_json_write_named_uint64(ctx->w, "tick_rate", spdk_get_ticks_hz());
|
||||
spdk_json_write_named_uint64(ctx->w, "ticks", spdk_get_ticks());
|
||||
|
||||
spdk_json_write_named_array_begin(ctx->w, "bdevs");
|
||||
if (ctx->per_channel == false) {
|
||||
spdk_json_write_named_array_begin(ctx->w, "bdevs");
|
||||
} else {
|
||||
bdev = spdk_bdev_desc_get_bdev(desc);
|
||||
|
||||
spdk_json_write_named_string(ctx->w, "name", spdk_bdev_get_name(bdev));
|
||||
spdk_json_write_named_array_begin(ctx->w, "channels");
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
@ -206,10 +217,8 @@ rpc_bdev_get_iostat_done(struct rpc_bdev_get_iostat_ctx *ctx)
|
||||
|
||||
static void
|
||||
rpc_bdev_get_iostat_dump(struct spdk_json_write_ctx *w,
|
||||
struct spdk_bdev *bdev,
|
||||
struct spdk_bdev_io_stat *stat)
|
||||
{
|
||||
spdk_json_write_named_string(w, "name", spdk_bdev_get_name(bdev));
|
||||
spdk_json_write_named_uint64(w, "bytes_read", stat->bytes_read);
|
||||
spdk_json_write_named_uint64(w, "num_read_ops", stat->num_read_ops);
|
||||
spdk_json_write_named_uint64(w, "bytes_written", stat->bytes_written);
|
||||
@ -240,7 +249,9 @@ rpc_bdev_get_iostat_cb(struct spdk_bdev *bdev,
|
||||
|
||||
spdk_json_write_object_begin(w);
|
||||
|
||||
rpc_bdev_get_iostat_dump(w, bdev, stat);
|
||||
spdk_json_write_named_string(w, "name", spdk_bdev_get_name(bdev));
|
||||
|
||||
rpc_bdev_get_iostat_dump(w, stat);
|
||||
|
||||
if (spdk_bdev_get_qd_sampling_period(bdev)) {
|
||||
spdk_json_write_named_uint64(w, "queue_depth_polling_period",
|
||||
@ -265,6 +276,7 @@ done:
|
||||
|
||||
struct rpc_bdev_get_iostat {
|
||||
char *name;
|
||||
bool per_channel;
|
||||
};
|
||||
|
||||
static void
|
||||
@ -275,6 +287,7 @@ free_rpc_bdev_get_iostat(struct rpc_bdev_get_iostat *r)
|
||||
|
||||
static const struct spdk_json_object_decoder rpc_bdev_get_iostat_decoders[] = {
|
||||
{"name", offsetof(struct rpc_bdev_get_iostat, name), spdk_json_decode_string, true},
|
||||
{"per_channel", offsetof(struct rpc_bdev_get_iostat, per_channel), spdk_json_decode_bool, true},
|
||||
};
|
||||
|
||||
static int
|
||||
@ -304,6 +317,36 @@ _bdev_get_device_stat(void *_ctx, struct spdk_bdev *bdev)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
rpc_bdev_get_per_channel_stat_done(struct spdk_bdev *bdev, void *ctx, int status)
|
||||
{
|
||||
struct rpc_bdev_iostat *_stat = ctx;
|
||||
|
||||
rpc_bdev_get_iostat_done(_stat->ctx);
|
||||
|
||||
spdk_bdev_close(_stat->desc);
|
||||
|
||||
free(_stat);
|
||||
}
|
||||
|
||||
static void
|
||||
rpc_bdev_get_per_channel_stat(struct spdk_bdev_channel_iter *i, struct spdk_bdev *bdev,
|
||||
struct spdk_io_channel *ch, void *ctx)
|
||||
{
|
||||
struct rpc_bdev_iostat *_stat = ctx;
|
||||
struct spdk_json_write_ctx *w = _stat->ctx->w;
|
||||
struct spdk_bdev_io_stat stat;
|
||||
|
||||
spdk_bdev_get_io_stat(bdev, ch, &stat);
|
||||
|
||||
spdk_json_write_object_begin(w);
|
||||
spdk_json_write_named_uint64(w, "thread_id", spdk_thread_get_id(spdk_get_thread()));
|
||||
rpc_bdev_get_iostat_dump(w, &stat);
|
||||
spdk_json_write_object_end(w);
|
||||
|
||||
spdk_bdev_for_each_channel_continue(i, 0);
|
||||
}
|
||||
|
||||
static void
|
||||
rpc_bdev_get_iostat(struct spdk_jsonrpc_request *request,
|
||||
const struct spdk_json_val *params)
|
||||
@ -325,6 +368,13 @@ rpc_bdev_get_iostat(struct spdk_jsonrpc_request *request,
|
||||
return;
|
||||
}
|
||||
|
||||
if (req.per_channel == true && !req.name) {
|
||||
SPDK_ERRLOG("Bdev name is required for per channel IO statistics\n");
|
||||
spdk_jsonrpc_send_error_response(request, -EINVAL, spdk_strerror(EINVAL));
|
||||
free_rpc_bdev_get_iostat(&req);
|
||||
return;
|
||||
}
|
||||
|
||||
if (req.name) {
|
||||
rc = spdk_bdev_open_ext(req.name, false, dummy_bdev_event_cb, NULL, &desc);
|
||||
if (rc != 0) {
|
||||
@ -351,6 +401,7 @@ rpc_bdev_get_iostat(struct spdk_jsonrpc_request *request,
|
||||
*/
|
||||
ctx->bdev_count++;
|
||||
ctx->request = request;
|
||||
ctx->per_channel = req.per_channel;
|
||||
|
||||
if (desc != NULL) {
|
||||
_stat = calloc(1, sizeof(struct rpc_bdev_iostat));
|
||||
@ -364,8 +415,15 @@ rpc_bdev_get_iostat(struct spdk_jsonrpc_request *request,
|
||||
|
||||
ctx->bdev_count++;
|
||||
_stat->ctx = ctx;
|
||||
spdk_bdev_get_device_stat(spdk_bdev_desc_get_bdev(desc), &_stat->stat,
|
||||
rpc_bdev_get_iostat_cb, _stat);
|
||||
if (req.per_channel == false) {
|
||||
spdk_bdev_get_device_stat(spdk_bdev_desc_get_bdev(desc), &_stat->stat,
|
||||
rpc_bdev_get_iostat_cb, _stat);
|
||||
} else {
|
||||
spdk_bdev_for_each_channel(spdk_bdev_desc_get_bdev(desc),
|
||||
rpc_bdev_get_per_channel_stat,
|
||||
_stat,
|
||||
rpc_bdev_get_per_channel_stat_done);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
rc = spdk_for_each_bdev(ctx, _bdev_get_device_stat);
|
||||
@ -376,11 +434,11 @@ rpc_bdev_get_iostat(struct spdk_jsonrpc_request *request,
|
||||
|
||||
if (ctx->rc == 0) {
|
||||
/* We want to fail the RPC for all failures. The callback function to
|
||||
* spdk_bdev_get_device_stat() is executed after stack unwinding if successful.
|
||||
* Hence defer starting RPC response until it is ensured that all
|
||||
* spdk_bdev_get_device_stat() calls will succeed or there is no bdev.
|
||||
* spdk_bdev_for_each_channel() is executed after stack unwinding if
|
||||
* successful. Hence defer starting RPC response until it is ensured that
|
||||
* all spdk_bdev_for_each_channel() calls will succeed or there is no bdev.
|
||||
*/
|
||||
rpc_bdev_get_iostat_started(ctx);
|
||||
rpc_bdev_get_iostat_started(ctx, desc);
|
||||
}
|
||||
|
||||
rpc_bdev_get_iostat_done(ctx);
|
||||
|
@ -1467,11 +1467,12 @@ def bdev_get_bdevs(client, name=None, timeout=None):
|
||||
return client.call('bdev_get_bdevs', params)
|
||||
|
||||
|
||||
def bdev_get_iostat(client, name=None):
|
||||
def bdev_get_iostat(client, name=None, per_channel=None):
|
||||
"""Get I/O statistics for block devices.
|
||||
|
||||
Args:
|
||||
name: bdev name to query (optional; if omitted, query all bdevs)
|
||||
per_channel: display per channel IO stats for specified bdev
|
||||
|
||||
Returns:
|
||||
I/O statistics for the requested block devices.
|
||||
@ -1479,6 +1480,8 @@ def bdev_get_iostat(client, name=None):
|
||||
params = {}
|
||||
if name:
|
||||
params['name'] = name
|
||||
if per_channel:
|
||||
params['per_channel'] = per_channel
|
||||
return client.call('bdev_get_iostat', params)
|
||||
|
||||
|
||||
|
@ -1094,11 +1094,14 @@ if __name__ == "__main__":
|
||||
|
||||
def bdev_get_iostat(args):
|
||||
print_dict(rpc.bdev.bdev_get_iostat(args.client,
|
||||
name=args.name))
|
||||
name=args.name,
|
||||
per_channel=args.per_channel))
|
||||
|
||||
p = subparsers.add_parser('bdev_get_iostat',
|
||||
help='Display current I/O statistics of all the blockdevs or required blockdev.')
|
||||
p.add_argument('-b', '--name', help="Name of the Blockdev. Example: Nvme0n1", required=False)
|
||||
p.add_argument('-c', '--per-channel', default=False, dest='per_channel', help='Display per channel IO stats for specified device',
|
||||
action='store_true', required=False)
|
||||
p.set_defaults(func=bdev_get_iostat)
|
||||
|
||||
def bdev_enable_histogram(args):
|
||||
|
@ -488,6 +488,61 @@ function qd_sampling_test_suite() {
|
||||
trap - SIGINT SIGTERM EXIT
|
||||
}
|
||||
|
||||
function stat_function_test() {
|
||||
local bdev_name=$1
|
||||
local iostats
|
||||
local io_count1
|
||||
local io_count2
|
||||
local iostats_per_channel
|
||||
local io_count_per_channel1
|
||||
local io_count_per_channel2
|
||||
local io_count_per_channel_all=0
|
||||
|
||||
iostats=$($rpc_py bdev_get_iostat -b $bdev_name)
|
||||
io_count1=$(jq -r '.bdevs[0].num_read_ops' <<< "$iostats")
|
||||
|
||||
iostats_per_channel=$($rpc_py bdev_get_iostat -b $bdev_name -c)
|
||||
io_count_per_channel1=$(jq -r '.channels[0].num_read_ops' <<< "$iostats_per_channel")
|
||||
io_count_per_channel_all=$((io_count_per_channel_all + io_count_per_channel1))
|
||||
io_count_per_channel2=$(jq -r '.channels[1].num_read_ops' <<< "$iostats_per_channel")
|
||||
io_count_per_channel_all=$((io_count_per_channel_all + io_count_per_channel2))
|
||||
|
||||
iostats=$($rpc_py bdev_get_iostat -b $bdev_name)
|
||||
io_count2=$(jq -r '.bdevs[0].num_read_ops' <<< "$iostats")
|
||||
|
||||
# There is little time passed between the three iostats collected. So that
|
||||
# the accumulated statistics from per channel data shall be bigger than the
|
||||
# the first run and smaller than the third run in this short time of period.
|
||||
if [ $io_count_per_channel_all -lt $io_count1 ] || [ $io_count_per_channel_all -gt $io_count2 ]; then
|
||||
echo "Failed to collect the per Core IO statistics"
|
||||
$rpc_py bdev_malloc_delete $STAT_DEV
|
||||
killprocess $STAT_PID
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
function stat_test_suite() {
|
||||
STAT_DEV="Malloc_STAT"
|
||||
|
||||
# Run bdevperf with 2 cores so as to collect per Core IO statistics
|
||||
"$testdir/bdevperf/bdevperf" -z -m 0x3 -q 256 -o 4096 -w randread -t 10 -C "$env_ctx" &
|
||||
STAT_PID=$!
|
||||
echo "Process Bdev IO statistics testing pid: $STAT_PID"
|
||||
trap 'cleanup; killprocess $STAT_PID; exit 1' SIGINT SIGTERM EXIT
|
||||
waitforlisten $STAT_PID
|
||||
|
||||
$rpc_py bdev_malloc_create -b $STAT_DEV 128 512
|
||||
waitforbdev $STAT_DEV
|
||||
|
||||
$rootdir/test/bdev/bdevperf/bdevperf.py perform_tests &
|
||||
sleep 2
|
||||
stat_function_test $STAT_DEV
|
||||
|
||||
$rpc_py bdev_malloc_delete $STAT_DEV
|
||||
killprocess $STAT_PID
|
||||
trap - SIGINT SIGTERM EXIT
|
||||
}
|
||||
|
||||
# Inital bdev creation and configuration
|
||||
#-----------------------------------------------------
|
||||
QOS_DEV_1="Malloc_0"
|
||||
@ -609,6 +664,7 @@ if [[ $test_type == bdev ]]; then
|
||||
run_test "bdev_qos" qos_test_suite "$env_ctx"
|
||||
run_test "bdev_qd_sampling" qd_sampling_test_suite "$env_ctx"
|
||||
run_test "bdev_error" error_test_suite "$env_ctx"
|
||||
run_test "bdev_stat" stat_test_suite "$env_ctx"
|
||||
fi
|
||||
|
||||
# Temporarily disabled - infinite loop
|
||||
|
Loading…
Reference in New Issue
Block a user