sw_hotplug: avoid hotplug timeouts
Avoid hotplug application timeouts on machines with multiple NVMe drives by scaling app run time to number of NVMe drives. Furthermore, change the way we wait for hotplug app initialization by using "perform_tests" RPC, and termination by starting it via timeout command. Second part of the series fixing #2201. Fixes #2201 Change-Id: Id82c8e8f6b9e870a55c4f43a11c755982855deeb Signed-off-by: Krzysztof Karas <krzysztof.karas@intel.com> Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/15965 Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Konrad Sztyber <konrad.sztyber@intel.com> Reviewed-by: Tomasz Zawadzki <tomasz.zawadzki@intel.com>
This commit is contained in:
parent
f955f93c3a
commit
95aa1a7337
@ -8,6 +8,9 @@ rootdir=$(readlink -f $testdir/../..)
|
|||||||
source $rootdir/scripts/common.sh
|
source $rootdir/scripts/common.sh
|
||||||
source $rootdir/test/common/autotest_common.sh
|
source $rootdir/test/common/autotest_common.sh
|
||||||
|
|
||||||
|
export PYTHONPATH="$rootdir/examples/nvme/hotplug/"
|
||||||
|
rpc_py=$rootdir/scripts/rpc.py
|
||||||
|
|
||||||
# Pci bus hotplug
|
# Pci bus hotplug
|
||||||
# Helper function to remove/attach cotrollers
|
# Helper function to remove/attach cotrollers
|
||||||
remove_attach_helper() {
|
remove_attach_helper() {
|
||||||
@ -16,12 +19,6 @@ remove_attach_helper() {
|
|||||||
local use_bdev=$3
|
local use_bdev=$3
|
||||||
local dev
|
local dev
|
||||||
|
|
||||||
# We need to make sure we wait long enough for hotplug to initialize the devices
|
|
||||||
# and start IO - if we start removing devices before that happens we will end up
|
|
||||||
# stepping on hotplug's toes forcing it to fail to report proper count of given
|
|
||||||
# events.
|
|
||||||
sleep "$hotplug_wait"
|
|
||||||
|
|
||||||
while ((hotplug_events--)); do
|
while ((hotplug_events--)); do
|
||||||
for dev in "${nvmes[@]}"; do
|
for dev in "${nvmes[@]}"; do
|
||||||
echo 1 > "/sys/bus/pci/devices/$dev/remove"
|
echo 1 > "/sys/bus/pci/devices/$dev/remove"
|
||||||
@ -72,30 +69,29 @@ remove_attach_helper() {
|
|||||||
run_hotplug() {
|
run_hotplug() {
|
||||||
trap 'killprocess $hotplug_pid; exit 1' SIGINT SIGTERM EXIT
|
trap 'killprocess $hotplug_pid; exit 1' SIGINT SIGTERM EXIT
|
||||||
|
|
||||||
"$SPDK_EXAMPLE_DIR/hotplug" \
|
test_time=$((hotplug_events * hotplug_wait * nvme_count))
|
||||||
|
|
||||||
|
# Hotplug may sometimes hang, so start it via timeout command.
|
||||||
|
timeout -k 2s $((test_time + hotplug_wait)) "$SPDK_EXAMPLE_DIR/hotplug" \
|
||||||
-i 0 \
|
-i 0 \
|
||||||
-t $((hotplug_events * hotplug_wait + hotplug_wait * 3)) \
|
-t $((test_time)) \
|
||||||
-n $((hotplug_events * nvme_count)) \
|
-n $((hotplug_events * nvme_count)) \
|
||||||
-r $((hotplug_events * nvme_count)) \
|
-r $((hotplug_events * nvme_count)) \
|
||||||
-l warning &
|
-l warning --wait-for-rpc &
|
||||||
hotplug_pid=$!
|
timeout_pid=$!
|
||||||
|
hotplug_pid=$(ps -o pid= --ppid "$timeout_pid")
|
||||||
|
|
||||||
|
# Make sure Hotplug started before removing and inserting devices.
|
||||||
|
waitforlisten "$hotplug_pid"
|
||||||
|
|
||||||
|
$rpc_py --plugin hotplug_plugin perform_tests
|
||||||
|
|
||||||
remove_attach_helper "$hotplug_events" "$hotplug_wait" false
|
remove_attach_helper "$hotplug_events" "$hotplug_wait" false
|
||||||
|
|
||||||
# Wait in case hotplug app is lagging behind
|
|
||||||
# and kill it, if it hung.
|
|
||||||
sleep $hotplug_wait
|
|
||||||
|
|
||||||
if ! kill -0 "$hotplug_pid"; then
|
|
||||||
# hotplug already finished, check for the error code.
|
|
||||||
wait "$hotplug_pid"
|
|
||||||
else
|
|
||||||
echo "Killing hotplug application"
|
|
||||||
killprocess $hotplug_pid
|
|
||||||
return 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
trap - SIGINT SIGTERM EXIT
|
trap - SIGINT SIGTERM EXIT
|
||||||
|
|
||||||
|
# Check timeout return code.
|
||||||
|
wait "$timeout_pid"
|
||||||
}
|
}
|
||||||
|
|
||||||
# SPDK target hotplug
|
# SPDK target hotplug
|
||||||
|
Loading…
Reference in New Issue
Block a user