Spdk/test/nvme/hotplug.sh
Darek Stojaczyk 814072fa4e env_dpdk/pci: delay device initialization on hotplug
A workaround for kernel deadlocks surfaced in #1275.

DPDK basically offers two APIs for hotplugging all PCI devices:
rte_bus_scan() and rte_bus_probe(). Scan iterates through
/sys/bus/pci/devices/* and creates corresponding rte_pci_device-s,
then rte_bus_probe() tries to initialize each device with the
supporting driver.

Previously we did scan and probe together, one after another, now
we'll have an intermediate step. After scanning the bus, we'll
iterate through all rte_pci_device-s and temporarily blacklist any
newly detected devices. We'll use devargs->data field to a store
a timeout value (integer) after which the device can be un-blacklisted
and initialized. devargs->data is documented in DPDK as "Device
string storage" and it's a char*, but it's not referenced anywhere
in DPDK. rte_bus_probe() respects the blacklist and doesn't do
absolutely anything with blacklisted ones.

The timeout value is 2 seconds, which should be plenty enough
for an NVMe device to reset, leave the critical lock sections in
kernel, and let us initialize it safely.

Note that direct attach by BDF doesn't respect the blacklist,
so an NVMe attach RPC won't be delayed in any way, it will continue
to work as it always did. Only the automatic discovery & enumeration
is deferred.

Change-Id: I62b719271bd0755bc2882331ea33f69897b1e5e5
Signed-off-by: Darek Stojaczyk <dariusz.stojaczyk@intel.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/1733
Community-CI: Mellanox Build Bot
Reviewed-by: Jim Harris <james.r.harris@intel.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
2020-07-23 20:48:47 +00:00

135 lines
3.0 KiB
Bash
Executable File

#!/usr/bin/env bash
testdir=$(readlink -f $(dirname $0))
rootdir=$(readlink -f $testdir/../..)
source $rootdir/test/common/autotest_common.sh
if [ -z "${DEPENDENCY_DIR}" ]; then
echo DEPENDENCY_DIR not defined!
exit 1
fi
function ssh_vm() {
xtrace_disable
sshpass -p "$password" ssh -o PubkeyAuthentication=no \
-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -p 10022 root@localhost "$@"
xtrace_restore
}
function monitor_cmd() {
echo "$@" | nc localhost 4444 | tail --lines=+2 | (grep -v '^(qemu) ' || true)
}
function get_online_devices_count() {
ssh_vm "lspci | grep -c NVM"
}
function wait_for_devices_ready() {
count=$(get_online_devices_count)
while [ $count -ne 4 ]; do
echo "waitting for all devices online"
count=$(get_online_devices_count)
done
}
function insert_devices() {
for i in {0..3}; do
monitor_cmd "device_add nvme,drive=drive$i,id=nvme$i,serial=nvme$i"
done
wait_for_devices_ready
ssh_vm "scripts/setup.sh"
}
function remove_devices() {
for i in {0..3}; do
monitor_cmd "device_del nvme$i"
done
}
function devices_delete() {
for i in {0..3}; do
rm "$SPDK_TEST_STORAGE/nvme$i.img"
done
}
password=$1
base_img=${DEPENDENCY_DIR}/fedora-hotplug.qcow2
test_img=${DEPENDENCY_DIR}/fedora-hotplug-test.qcow2
qemu_pidfile=${DEPENDENCY_DIR}/qemupid
if [ ! -e "$base_img" ]; then
echo "Hotplug VM image not found; skipping test"
exit 0
fi
timing_enter start_qemu
qemu-img create -b "$base_img" -f qcow2 "$test_img"
for i in {0..3}; do
dd if=/dev/zero of="$SPDK_TEST_STORAGE/nvme$i.img" bs=1M count=1024
done
qemu-system-x86_64 \
-daemonize -display none -m 8192 \
-pidfile "$qemu_pidfile" \
-hda "$test_img" \
-net user,hostfwd=tcp::10022-:22 \
-net nic \
-cpu host \
-smp cores=16,sockets=1 \
--enable-kvm \
-chardev socket,id=mon0,host=localhost,port=4444,server,nowait \
-mon chardev=mon0,mode=readline \
-drive format=raw,file="$SPDK_TEST_STORAGE/nvme0.img",if=none,id=drive0 \
-drive format=raw,file="$SPDK_TEST_STORAGE/nvme1.img",if=none,id=drive1 \
-drive format=raw,file="$SPDK_TEST_STORAGE/nvme2.img",if=none,id=drive2 \
-drive format=raw,file="$SPDK_TEST_STORAGE/nvme3.img",if=none,id=drive3
timing_exit start_qemu
timing_enter wait_for_vm
ssh_vm 'echo ready'
timing_exit wait_for_vm
timing_enter copy_repo
files_to_copy="scripts "
files_to_copy+="include/spdk/pci_ids.h "
files_to_copy+="build/examples/hotplug "
files_to_copy+="build/lib "
files_to_copy+="dpdk/build/lib "
(
cd "$rootdir"
tar -cf - $files_to_copy
) | (ssh_vm "tar -xf -")
timing_exit copy_repo
insert_devices
timing_enter hotplug_test
ssh_vm "LD_LIBRARY_PATH=/root//build/lib:/root/dpdk/build/lib:$LD_LIBRARY_PATH build/examples/hotplug -i 0 -t 25 -n 4 -r 8" &
example_pid=$!
sleep 6
remove_devices
sleep 4
insert_devices
sleep 6
remove_devices
devices_delete
timing_enter wait_for_example
wait $example_pid
timing_exit wait_for_example
trap - SIGINT SIGTERM EXIT
qemupid=$(awk '{printf $0}' "$qemu_pidfile")
kill -9 $qemupid
rm "$qemu_pidfile"
rm "$test_img"
timing_exit hotplug_test