From 9ba80de0dd1ff42ae70fc9fea9d33f67be78d7ea Mon Sep 17 00:00:00 2001 From: Michal Berger Date: Thu, 23 Jan 2020 00:57:39 +0100 Subject: [PATCH] test/nvme: Don't wait for spdk_stub0 if stub dies prematurely If stub terminates right after execution, e.g. due to lack of system resources (requested number of cpu cores to run on, right amount of memory, etc.) start_stub() would end up blocking forever since there wouldn't be any entity around that would mknod spdk_stub0 for it to break the loop. Avoid the above scenario by checking if $stubpid is still visible under procfs and return if it goes missing. To make sure kill_stub() is still called to clean up after start_stub() declare proper trap prior the call to start_stub(). Additionally, avoid potential stderr noise in case kill|wait are told to act on a PID that's not visible in the ns anymore. Change-Id: Ief41200c57957f84b4f96a54baabc8da1f27dd43 Signed-off-by: Michal Berger Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/482653 Tested-by: SPDK CI Jenkins Reviewed-by: Jim Harris Reviewed-by: Ben Walker --- test/common/autotest_common.sh | 17 ++++++++++++++--- test/nvme/nvme.sh | 2 +- test/nvme/spdk_nvme_cli.sh | 2 +- 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/test/common/autotest_common.sh b/test/common/autotest_common.sh index 2105cc4cc..b77796400 100644 --- a/test/common/autotest_common.sh +++ b/test/common/autotest_common.sh @@ -575,7 +575,7 @@ function rbd_cleanup() { fi } -function start_stub() { +function _start_stub() { # Disable ASLR for multi-process testing. SPDK does support using DPDK multi-process, # but ASLR can still be unreliable in some cases. # We will reenable it again after multi-process testing is complete in kill_stub(). @@ -586,14 +586,25 @@ function start_stub() { stubpid=$! echo Waiting for stub to ready for secondary processes... while ! [ -e /var/run/spdk_stub0 ]; do + # If stub dies while we wait, bail + [[ -e /proc/$stubpid ]] || return 1 sleep 1s done echo done. } +function start_stub() { + if ! _start_stub "$@"; then + echo "stub failed" >&2 + return 1 + fi +} + function kill_stub() { - kill $1 $stubpid - wait $stubpid + if [[ -e /proc/$stubpid ]]; then + kill $1 $stubpid + wait $stubpid + fi 2>/dev/null || : rm -f /var/run/spdk_stub0 # Re-enable ASLR now that we are done with multi-process testing # Note: "1" enables ASLR w/o randomizing data segments, "2" adds data segment diff --git a/test/nvme/nvme.sh b/test/nvme/nvme.sh index ba153cf8e..5f603ca8c 100755 --- a/test/nvme/nvme.sh +++ b/test/nvme/nvme.sh @@ -102,8 +102,8 @@ if [ $(uname) = Linux ]; then fi if [ $(uname) = Linux ]; then - start_stub "-s 4096 -i 0 -m 0xE" trap "kill_stub -9; exit 1" SIGINT SIGTERM EXIT + start_stub "-s 4096 -i 0 -m 0xE" fi run_test "nvme_reset" $testdir/reset/reset -q 64 -w write -s 4096 -t 5 diff --git a/test/nvme/spdk_nvme_cli.sh b/test/nvme/spdk_nvme_cli.sh index a7f46e55e..ccb179c09 100755 --- a/test/nvme/spdk_nvme_cli.sh +++ b/test/nvme/spdk_nvme_cli.sh @@ -18,8 +18,8 @@ if [ ! -d $spdk_nvme_cli ]; then fi if [ $(uname) = Linux ]; then - start_stub "-s 2048 -i 0 -m 0xF" trap "kill_stub; exit 1" SIGINT SIGTERM EXIT + start_stub "-s 2048 -i 0 -m 0xF" fi # Build against the version of SPDK under test