sw_hotplug: avoid hotplug timeouts

Avoid hotplug application timeouts on machines
with multiple NVMe drives by scaling app run time
to number of NVMe drives.
Furthermore, change the way we wait for hotplug
app initialization by using "perform_tests" RPC,
and termination by starting it via timeout command.

Second part of the series fixing #2201.

Fixes #2201

Change-Id: Id82c8e8f6b9e870a55c4f43a11c755982855deeb
Signed-off-by: Krzysztof Karas <krzysztof.karas@intel.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/15965
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Konrad Sztyber <konrad.sztyber@intel.com>
Reviewed-by: Tomasz Zawadzki <tomasz.zawadzki@intel.com>
This commit is contained in:
Krzysztof Karas 2022-12-15 14:45:32 +01:00 committed by Tomasz Zawadzki
parent f955f93c3a
commit 95aa1a7337

View File

@ -8,6 +8,9 @@ rootdir=$(readlink -f $testdir/../..)
source $rootdir/scripts/common.sh source $rootdir/scripts/common.sh
source $rootdir/test/common/autotest_common.sh source $rootdir/test/common/autotest_common.sh
export PYTHONPATH="$rootdir/examples/nvme/hotplug/"
rpc_py=$rootdir/scripts/rpc.py
# Pci bus hotplug # Pci bus hotplug
# Helper function to remove/attach cotrollers # Helper function to remove/attach cotrollers
remove_attach_helper() { remove_attach_helper() {
@ -16,12 +19,6 @@ remove_attach_helper() {
local use_bdev=$3 local use_bdev=$3
local dev local dev
# We need to make sure we wait long enough for hotplug to initialize the devices
# and start IO - if we start removing devices before that happens we will end up
# stepping on hotplug's toes forcing it to fail to report proper count of given
# events.
sleep "$hotplug_wait"
while ((hotplug_events--)); do while ((hotplug_events--)); do
for dev in "${nvmes[@]}"; do for dev in "${nvmes[@]}"; do
echo 1 > "/sys/bus/pci/devices/$dev/remove" echo 1 > "/sys/bus/pci/devices/$dev/remove"
@ -72,30 +69,29 @@ remove_attach_helper() {
run_hotplug() { run_hotplug() {
trap 'killprocess $hotplug_pid; exit 1' SIGINT SIGTERM EXIT trap 'killprocess $hotplug_pid; exit 1' SIGINT SIGTERM EXIT
"$SPDK_EXAMPLE_DIR/hotplug" \ test_time=$((hotplug_events * hotplug_wait * nvme_count))
# Hotplug may sometimes hang, so start it via timeout command.
timeout -k 2s $((test_time + hotplug_wait)) "$SPDK_EXAMPLE_DIR/hotplug" \
-i 0 \ -i 0 \
-t $((hotplug_events * hotplug_wait + hotplug_wait * 3)) \ -t $((test_time)) \
-n $((hotplug_events * nvme_count)) \ -n $((hotplug_events * nvme_count)) \
-r $((hotplug_events * nvme_count)) \ -r $((hotplug_events * nvme_count)) \
-l warning & -l warning --wait-for-rpc &
hotplug_pid=$! timeout_pid=$!
hotplug_pid=$(ps -o pid= --ppid "$timeout_pid")
# Make sure Hotplug started before removing and inserting devices.
waitforlisten "$hotplug_pid"
$rpc_py --plugin hotplug_plugin perform_tests
remove_attach_helper "$hotplug_events" "$hotplug_wait" false remove_attach_helper "$hotplug_events" "$hotplug_wait" false
# Wait in case hotplug app is lagging behind
# and kill it, if it hung.
sleep $hotplug_wait
if ! kill -0 "$hotplug_pid"; then
# hotplug already finished, check for the error code.
wait "$hotplug_pid"
else
echo "Killing hotplug application"
killprocess $hotplug_pid
return 1
fi
trap - SIGINT SIGTERM EXIT trap - SIGINT SIGTERM EXIT
# Check timeout return code.
wait "$timeout_pid"
} }
# SPDK target hotplug # SPDK target hotplug