A workaround for kernel deadlocks surfaced in #1275. DPDK basically offers two APIs for hotplugging all PCI devices: rte_bus_scan() and rte_bus_probe(). Scan iterates through /sys/bus/pci/devices/* and creates corresponding rte_pci_device-s, then rte_bus_probe() tries to initialize each device with the supporting driver. Previously we did scan and probe together, one after another, now we'll have an intermediate step. After scanning the bus, we'll iterate through all rte_pci_device-s and temporarily blacklist any newly detected devices. We'll use devargs->data field to a store a timeout value (integer) after which the device can be un-blacklisted and initialized. devargs->data is documented in DPDK as "Device string storage" and it's a char*, but it's not referenced anywhere in DPDK. rte_bus_probe() respects the blacklist and doesn't do absolutely anything with blacklisted ones. The timeout value is 2 seconds, which should be plenty enough for an NVMe device to reset, leave the critical lock sections in kernel, and let us initialize it safely. Note that direct attach by BDF doesn't respect the blacklist, so an NVMe attach RPC won't be delayed in any way, it will continue to work as it always did. Only the automatic discovery & enumeration is deferred. Change-Id: I62b719271bd0755bc2882331ea33f69897b1e5e5 Signed-off-by: Darek Stojaczyk <dariusz.stojaczyk@intel.com> Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/1733 Community-CI: Mellanox Build Bot Reviewed-by: Jim Harris <james.r.harris@intel.com> Reviewed-by: Ben Walker <benjamin.walker@intel.com> Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
135 lines
3.0 KiB
Bash
Executable File
135 lines
3.0 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
|
|
testdir=$(readlink -f $(dirname $0))
|
|
rootdir=$(readlink -f $testdir/../..)
|
|
source $rootdir/test/common/autotest_common.sh
|
|
|
|
if [ -z "${DEPENDENCY_DIR}" ]; then
|
|
echo DEPENDENCY_DIR not defined!
|
|
exit 1
|
|
fi
|
|
|
|
function ssh_vm() {
|
|
xtrace_disable
|
|
sshpass -p "$password" ssh -o PubkeyAuthentication=no \
|
|
-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -p 10022 root@localhost "$@"
|
|
xtrace_restore
|
|
}
|
|
|
|
function monitor_cmd() {
|
|
echo "$@" | nc localhost 4444 | tail --lines=+2 | (grep -v '^(qemu) ' || true)
|
|
}
|
|
|
|
function get_online_devices_count() {
|
|
ssh_vm "lspci | grep -c NVM"
|
|
}
|
|
|
|
function wait_for_devices_ready() {
|
|
count=$(get_online_devices_count)
|
|
|
|
while [ $count -ne 4 ]; do
|
|
echo "waitting for all devices online"
|
|
count=$(get_online_devices_count)
|
|
done
|
|
}
|
|
|
|
function insert_devices() {
|
|
for i in {0..3}; do
|
|
monitor_cmd "device_add nvme,drive=drive$i,id=nvme$i,serial=nvme$i"
|
|
done
|
|
wait_for_devices_ready
|
|
ssh_vm "scripts/setup.sh"
|
|
}
|
|
|
|
function remove_devices() {
|
|
for i in {0..3}; do
|
|
monitor_cmd "device_del nvme$i"
|
|
done
|
|
}
|
|
|
|
function devices_delete() {
|
|
for i in {0..3}; do
|
|
rm "$SPDK_TEST_STORAGE/nvme$i.img"
|
|
done
|
|
}
|
|
|
|
password=$1
|
|
base_img=${DEPENDENCY_DIR}/fedora-hotplug.qcow2
|
|
test_img=${DEPENDENCY_DIR}/fedora-hotplug-test.qcow2
|
|
qemu_pidfile=${DEPENDENCY_DIR}/qemupid
|
|
|
|
if [ ! -e "$base_img" ]; then
|
|
echo "Hotplug VM image not found; skipping test"
|
|
exit 0
|
|
fi
|
|
|
|
timing_enter start_qemu
|
|
|
|
qemu-img create -b "$base_img" -f qcow2 "$test_img"
|
|
|
|
for i in {0..3}; do
|
|
dd if=/dev/zero of="$SPDK_TEST_STORAGE/nvme$i.img" bs=1M count=1024
|
|
done
|
|
|
|
qemu-system-x86_64 \
|
|
-daemonize -display none -m 8192 \
|
|
-pidfile "$qemu_pidfile" \
|
|
-hda "$test_img" \
|
|
-net user,hostfwd=tcp::10022-:22 \
|
|
-net nic \
|
|
-cpu host \
|
|
-smp cores=16,sockets=1 \
|
|
--enable-kvm \
|
|
-chardev socket,id=mon0,host=localhost,port=4444,server,nowait \
|
|
-mon chardev=mon0,mode=readline \
|
|
-drive format=raw,file="$SPDK_TEST_STORAGE/nvme0.img",if=none,id=drive0 \
|
|
-drive format=raw,file="$SPDK_TEST_STORAGE/nvme1.img",if=none,id=drive1 \
|
|
-drive format=raw,file="$SPDK_TEST_STORAGE/nvme2.img",if=none,id=drive2 \
|
|
-drive format=raw,file="$SPDK_TEST_STORAGE/nvme3.img",if=none,id=drive3
|
|
|
|
timing_exit start_qemu
|
|
|
|
timing_enter wait_for_vm
|
|
ssh_vm 'echo ready'
|
|
timing_exit wait_for_vm
|
|
|
|
timing_enter copy_repo
|
|
files_to_copy="scripts "
|
|
files_to_copy+="include/spdk/pci_ids.h "
|
|
files_to_copy+="build/examples/hotplug "
|
|
files_to_copy+="build/lib "
|
|
files_to_copy+="dpdk/build/lib "
|
|
(
|
|
cd "$rootdir"
|
|
tar -cf - $files_to_copy
|
|
) | (ssh_vm "tar -xf -")
|
|
timing_exit copy_repo
|
|
|
|
insert_devices
|
|
|
|
timing_enter hotplug_test
|
|
|
|
ssh_vm "LD_LIBRARY_PATH=/root//build/lib:/root/dpdk/build/lib:$LD_LIBRARY_PATH build/examples/hotplug -i 0 -t 25 -n 4 -r 8" &
|
|
example_pid=$!
|
|
|
|
sleep 6
|
|
remove_devices
|
|
sleep 4
|
|
insert_devices
|
|
sleep 6
|
|
remove_devices
|
|
devices_delete
|
|
|
|
timing_enter wait_for_example
|
|
wait $example_pid
|
|
timing_exit wait_for_example
|
|
|
|
trap - SIGINT SIGTERM EXIT
|
|
|
|
qemupid=$(awk '{printf $0}' "$qemu_pidfile")
|
|
kill -9 $qemupid
|
|
rm "$qemu_pidfile"
|
|
rm "$test_img"
|
|
|
|
timing_exit hotplug_test
|