From 95e057210e40845b91137c724d16a46b135e1da4 Mon Sep 17 00:00:00 2001 From: Monica Kenguva Date: Wed, 6 Apr 2022 21:49:06 +0000 Subject: [PATCH] test/nvmf: test reconnect_delay_sec parameter Signed-off-by: Monica Kenguva Change-Id: I45dc2b2fe660e5a53c8976dea2640ea53ec00a3d Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/12184 Community-CI: Broadcom CI Community-CI: Mellanox Build Bot Tested-by: SPDK CI Jenkins Reviewed-by: Jim Harris Reviewed-by: Shuhei Matsumoto --- module/bdev/nvme/bdev_nvme.c | 3 +++ scripts/bpf/nvmf_timeout.bt | 17 ++++++++++++++ test/nvmf/host/timeout.sh | 44 +++++++++++++++++++++++++++++++++++- test/nvmf/nvmf.sh | 4 +++- 4 files changed, 66 insertions(+), 2 deletions(-) create mode 100644 scripts/bpf/nvmf_timeout.bt diff --git a/module/bdev/nvme/bdev_nvme.c b/module/bdev/nvme/bdev_nvme.c index 0110694c9..32f4a82e0 100644 --- a/module/bdev/nvme/bdev_nvme.c +++ b/module/bdev/nvme/bdev_nvme.c @@ -1591,6 +1591,7 @@ bdev_nvme_reconnect_delay_timer_expired(void *ctx) { struct nvme_ctrlr *nvme_ctrlr = ctx; + SPDK_DTRACE_PROBE1(bdev_nvme_ctrlr_reconnect_delay, nvme_ctrlr->nbdev_ctrlr->name); pthread_mutex_lock(&nvme_ctrlr->mutex); spdk_poller_unregister(&nvme_ctrlr->reconnect_delay_timer); @@ -1783,6 +1784,7 @@ bdev_nvme_reconnect_ctrlr(struct nvme_ctrlr *nvme_ctrlr) { spdk_nvme_ctrlr_reconnect_async(nvme_ctrlr->ctrlr); + SPDK_DTRACE_PROBE1(bdev_nvme_ctrlr_reconnect, nvme_ctrlr->nbdev_ctrlr->name); assert(nvme_ctrlr->reset_detach_poller == NULL); nvme_ctrlr->reset_detach_poller = SPDK_POLLER_REGISTER(bdev_nvme_reconnect_ctrlr_poll, nvme_ctrlr, 0); @@ -1793,6 +1795,7 @@ bdev_nvme_reset_ctrlr(struct spdk_io_channel_iter *i, int status) { struct nvme_ctrlr *nvme_ctrlr = spdk_io_channel_iter_get_io_device(i); + SPDK_DTRACE_PROBE1(bdev_nvme_ctrlr_reset, nvme_ctrlr->nbdev_ctrlr->name); assert(status == 0); if (!spdk_nvme_ctrlr_is_fabrics(nvme_ctrlr->ctrlr)) { diff --git a/scripts/bpf/nvmf_timeout.bt b/scripts/bpf/nvmf_timeout.bt new file mode 100644 index 000000000..6a3443579 --- /dev/null +++ b/scripts/bpf/nvmf_timeout.bt @@ -0,0 +1,17 @@ +usdt:__EXE__:bdev_nvme_ctrlr_reconnect { + printf("%d.%06d: reconnect bdev controller %s\n", + elapsed / (uint64)(1000 * 1000), elapsed % (uint64)(1000 * 1000), + str(arg1)); +} + +usdt:__EXE__:bdev_nvme_ctrlr_reconnect_delay { + printf("%d.%06d: reconnect delay bdev controller %s\n", + elapsed / (uint64)(1000 * 1000), elapsed % (uint64)(1000 * 1000), + str(arg1)); +} + +usdt:__EXE__:bdev_nvme_ctrlr_reset { + printf("%d.%06d: reset bdev controller %s\n", + elapsed / (uint64)(1000 * 1000), elapsed % (uint64)(1000 * 1000), + str(arg1)); +} diff --git a/test/nvmf/host/timeout.sh b/test/nvmf/host/timeout.sh index ff2d830e9..b5e46dbd3 100755 --- a/test/nvmf/host/timeout.sh +++ b/test/nvmf/host/timeout.sh @@ -9,6 +9,7 @@ MALLOC_BDEV_SIZE=64 MALLOC_BLOCK_SIZE=512 rpc_py="$rootdir/scripts/rpc.py" +bpf_sh="$rootdir/scripts/bpftrace.sh" bdevperf_rpc_sock=/var/tmp/bdevperf.sock @@ -36,7 +37,7 @@ function get_controller() { $rpc_py -s $bdevperf_rpc_sock bdev_nvme_get_controllers | jq -r '.[].name' } -# case 1 test ctrlr_loss_timeout_sec time to try reconnecting to a ctrlr before deleting it +# Case 1 test ctrlr_loss_timeout_sec time to try reconnecting to a ctrlr before deleting it # ctrlr_loss_timeout_sec is 10 reconnect_delay_sec is 5 $rpc_py -s $bdevperf_rpc_sock bdev_nvme_set_options -r -1 $rpc_py -s $bdevperf_rpc_sock bdev_nvme_attach_controller -b NVMe0 -t $TEST_TRANSPORT -a $NVMF_FIRST_TARGET_IP -s $NVMF_PORT -f ipv4 -n nqn.2016-06.io.spdk:cnode1 -l 10 -o 5 @@ -90,6 +91,8 @@ wait $rpc_pid cat $testdir/try.txt cat /dev/null > $testdir/try.txt +# TODO: Check the IO fail if we wait for 5 sec, needs information from bdevperf + $rootdir/test/bdev/bdevperf/bdevperf.py -s $bdevperf_rpc_sock perform_tests & rpc_pid=$! sleep 1 @@ -103,8 +106,47 @@ cat $testdir/try.txt killprocess $bdevperf_pid rm -f $testdir/try.txt +# Case 3 test reconnect_delay_sec +# Time to delay a reconnect trial +$rootdir/test/bdev/bdevperf/bdevperf -m 0x4 -z -r $bdevperf_rpc_sock -q 128 -o 4096 -w verify -t 20 -f &> $testdir/try.txt & +bdevperf_pid=$! + +trap 'process_shm --id $NVMF_APP_SHM_ID; rm -f $testdir/try.txt; killprocess $bdevperf_pid; nvmftestfini; exit 1' SIGINT SIGTERM EXIT +waitforlisten $bdevperf_pid $bdevperf_rpc_sock + +#start_trace +$bpf_sh $bdevperf_pid $rootdir/scripts/bpf/nvmf_timeout.bt &> $testdir/trace.txt & +dtrace_pid=$! + +$rpc_py -s $bdevperf_rpc_sock bdev_nvme_set_options -r -1 + +# ctrlr_loss_timeout_sec is 10 reconnect_delay_sec is 2 +$rpc_py -s $bdevperf_rpc_sock bdev_nvme_attach_controller -b NVMe0 -t $TEST_TRANSPORT -a $NVMF_FIRST_TARGET_IP \ + -s $NVMF_PORT -f ipv4 -n nqn.2016-06.io.spdk:cnode1 -l 10 -o 2 +$rootdir/test/bdev/bdevperf/bdevperf.py -s $bdevperf_rpc_sock perform_tests & +rpc_pid=$! +sleep 1 +$rpc_py nvmf_subsystem_remove_listener nqn.2016-06.io.spdk:cnode1 -t $TEST_TRANSPORT -a $NVMF_FIRST_TARGET_IP -s $NVMF_PORT + +# wait for 5sec to count the reconnect delay frequency with 2 sec interval +sleep 5 +cat $testdir/trace.txt + +# Check the frequency of delay reconnect +if (("$(grep -c "reconnect delay bdev controller NVMe0" < $testdir/trace.txt)" != 2)); then + false +fi + +kill $dtrace_pid +rm -f $testdir/trace.txt +wait $rpc_pid +cat $testdir/try.txt + +killprocess $bdevperf_pid + $rpc_py nvmf_delete_subsystem nqn.2016-06.io.spdk:cnode1 trap - SIGINT SIGTERM EXIT +rm -f $testdir/try.txt nvmftestfini diff --git a/test/nvmf/nvmf.sh b/test/nvmf/nvmf.sh index fdc182ea2..ea9e969ae 100755 --- a/test/nvmf/nvmf.sh +++ b/test/nvmf/nvmf.sh @@ -84,7 +84,9 @@ if [[ $SPDK_TEST_USDT -eq 1 ]]; then run_test "nvmf_multipath" test/nvmf/host/multipath.sh "${TEST_ARGS[@]}" fi run_test "nvmf_discovery" test/nvmf/host/discovery.sh "${TEST_ARGS[@]}" -run_test "nvmf_timeout" test/nvmf/host/timeout.sh "${TEST_ARGS[@]}" +if [[ $SPDK_TEST_USDT -eq 1 ]]; then + run_test "nvmf_timeout" test/nvmf/host/timeout.sh "${TEST_ARGS[@]}" +fi # TODO: disabled due to intermittent failures (RDMA_CM_EVENT_UNREACHABLE/ETIMEDOUT) #run_test test/nvmf/host/identify_kernel_nvmf.sh $TEST_ARGS