test/vhost: live migration test case 3
To be run only manually as test pool does not have DUTs connected in pairs. Change-Id: If3ab3e671156b583adb35d5b23ee90003a6de732 Signed-off-by: Karol Latecki <karol.latecki@intel.com> Reviewed-on: https://review.gerrithub.io/400988 Reviewed-by: Pawel Wodkowski <pawelx.wodkowski@intel.com> Reviewed-by: Jim Harris <james.r.harris@intel.com> Tested-by: SPDK Automated Test System <sys_sgsw@intel.com> Reviewed-by: Daniel Verkamp <daniel.verkamp@intel.com>
This commit is contained in:
parent
0d1c3aefc3
commit
8ebaed2eb5
@ -220,8 +220,9 @@ function spdk_vhost_kill()
|
||||
rm $vhost_pid_file
|
||||
rc=1
|
||||
else
|
||||
#check vhost return code, activate trap on error
|
||||
wait $vhost_pid
|
||||
while kill -0 $vhost_pid; do
|
||||
echo "."
|
||||
done
|
||||
fi
|
||||
elif /bin/kill -0 $vhost_pid; then
|
||||
error "vhost NOT killed - you need to kill it manually"
|
||||
@ -294,9 +295,12 @@ function vm_create_ssh_config()
|
||||
echo " UserKnownHostsFile=/dev/null"
|
||||
echo " StrictHostKeyChecking=no"
|
||||
echo " User root"
|
||||
echo " ControlPath=$VM_BASE_DIR/%r@%h:%p.ssh"
|
||||
echo " ControlPath=/tmp/%r@%h:%p.ssh"
|
||||
echo ""
|
||||
) > $ssh_config
|
||||
# Control path created at /tmp because of live migration test case 3.
|
||||
# In case of using sshfs share for the test - control path cannot be
|
||||
# on share because remote server will fail on ssh commands.
|
||||
fi
|
||||
}
|
||||
|
||||
|
14
test/vhost/migration/autotest.config
Normal file
14
test/vhost/migration/autotest.config
Normal file
@ -0,0 +1,14 @@
|
||||
vhost_0_reactor_mask=0x1
|
||||
vhost_0_master_core=0
|
||||
|
||||
vhost_1_reactor_mask=0x1
|
||||
vhost_1_master_core=0
|
||||
|
||||
VM_0_qemu_mask=0x1
|
||||
VM_0_qemu_numa_node=0
|
||||
|
||||
VM_1_qemu_mask=0x1
|
||||
VM_1_qemu_numa_node=0
|
||||
|
||||
VM_2_qemu_mask=0x1
|
||||
VM_2_qemu_numa_node=0
|
20
test/vhost/migration/migration-tc3.job
Normal file
20
test/vhost/migration/migration-tc3.job
Normal file
@ -0,0 +1,20 @@
|
||||
[global]
|
||||
blocksize=4k-512k
|
||||
iodepth=128
|
||||
ioengine=libaio
|
||||
filename=
|
||||
group_reporting
|
||||
thread
|
||||
numjobs=1
|
||||
direct=1
|
||||
do_verify=1
|
||||
verify=md5
|
||||
verify_fatal=1
|
||||
verify_dump=1
|
||||
verify_backlog=8
|
||||
|
||||
[randwrite]
|
||||
rw=randwrite
|
||||
runtime=15
|
||||
time_based
|
||||
stonewall
|
206
test/vhost/migration/migration-tc3a.sh
Normal file
206
test/vhost/migration/migration-tc3a.sh
Normal file
@ -0,0 +1,206 @@
|
||||
source $SPDK_BUILD_DIR/test/nvmf/common.sh
|
||||
source $BASE_DIR/autotest.config
|
||||
|
||||
MGMT_TARGET_IP="10.102.17.181"
|
||||
MGMT_INITIATOR_IP="10.102.17.180"
|
||||
RDMA_TARGET_IP="10.0.0.1"
|
||||
RDMA_INITIATOR_IP="10.0.0.2"
|
||||
incoming_vm=1
|
||||
target_vm=2
|
||||
incoming_vm_ctrlr=naa.VhostScsi0.$incoming_vm
|
||||
target_vm_ctrlr=naa.VhostScsi0.$target_vm
|
||||
share_dir=$TEST_DIR/share
|
||||
job_file=$BASE_DIR/migration-tc3.job
|
||||
|
||||
function ssh_remote()
|
||||
{
|
||||
local ssh_cmd="ssh -i $SPDK_VHOST_SSH_KEY_FILE \
|
||||
-o UserKnownHostsFile=/dev/null \
|
||||
-o StrictHostKeyChecking=no -o ControlMaster=auto \
|
||||
root@$1"
|
||||
|
||||
shift
|
||||
$ssh_cmd "$@"
|
||||
}
|
||||
|
||||
function wait_for_remote()
|
||||
{
|
||||
local timeout=40
|
||||
set +x
|
||||
while [[ ! -f $share_dir/DONE ]]; do
|
||||
echo -n "."
|
||||
if (( timeout-- == 0 )); then
|
||||
error "timeout while waiting for FIO!"
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
set -x
|
||||
rm -f $share_dir/DONE
|
||||
}
|
||||
|
||||
function check_rdma_connection()
|
||||
{
|
||||
local nic_name=$(ip -4 -o addr show to $RDMA_TARGET_IP up | cut -d' ' -f2)
|
||||
if [[ -z $nic_name ]]; then
|
||||
error "There is no NIC with IP address $RDMA_TARGET_IP configured"
|
||||
fi
|
||||
|
||||
if ! ls /sys/class/infiniband/*/device/net/$nic_name &> /dev/null; then
|
||||
error "$nic_name with IP $RDMA_TARGET_IP is not a RDMA capable NIC"
|
||||
fi
|
||||
|
||||
}
|
||||
|
||||
function host1_cleanup_nvmf()
|
||||
{
|
||||
notice "Shutting down nvmf_tgt on local server"
|
||||
if [[ ! -z "$1" ]]; then
|
||||
pkill --signal $1 -F $nvmf_dir/nvmf_tgt.pid
|
||||
else
|
||||
pkill -F $nvmf_dir/nvmf_tgt.pid
|
||||
fi
|
||||
rm -f $nvmf_dir/nvmf_tgt.pid
|
||||
}
|
||||
|
||||
function host1_cleanup_vhost()
|
||||
{
|
||||
trap 'host1_cleanup_nvmf SIGKILL; error_exit "${FUNCNAME}" "${LINENO}"' INT ERR EXIT
|
||||
notice "Shutting down VM $incoming_vm"
|
||||
vm_kill $incoming_vm
|
||||
|
||||
notice "Removing bdev & controller from vhost on local server"
|
||||
$rpc_0 delete_bdev Nvme0n1
|
||||
$rpc_0 remove_vhost_controller $incoming_vm_ctrlr
|
||||
|
||||
notice "Shutting down vhost app"
|
||||
spdk_vhost_kill 0
|
||||
|
||||
host1_cleanup_nvmf
|
||||
}
|
||||
|
||||
function host1_start_nvmf()
|
||||
{
|
||||
nvmf_dir="$TEST_DIR/nvmf_tgt"
|
||||
rpc_nvmf="python $SPDK_BUILD_DIR/scripts/rpc.py -s $nvmf_dir/nvmf_rpc.sock"
|
||||
|
||||
notice "Starting nvmf_tgt instance on local server"
|
||||
mkdir -p $nvmf_dir
|
||||
rm -rf $nvmf_dir/*
|
||||
|
||||
cp $SPDK_BUILD_DIR/test/nvmf/nvmf.conf $nvmf_dir/nvmf.conf
|
||||
$SPDK_BUILD_DIR/scripts/gen_nvme.sh >> $nvmf_dir/nvmf.conf
|
||||
|
||||
trap 'host1_cleanup_nvmf SIGKILL; error_exit "${FUNCNAME}" "${LINENO}"' INT ERR EXIT
|
||||
$SPDK_BUILD_DIR/app/nvmf_tgt/nvmf_tgt -s 512 -c $nvmf_dir/nvmf.conf -r $nvmf_dir/nvmf_rpc.sock &
|
||||
nvmf_tgt_pid=$!
|
||||
echo $nvmf_tgt_pid > $nvmf_dir/nvmf_tgt.pid
|
||||
waitforlisten "$nvmf_tgt_pid" "$nvmf_dir/nvmf_rpc.sock"
|
||||
|
||||
$rpc_nvmf construct_nvmf_subsystem nqn.2018-02.io.spdk:cnode1 \
|
||||
"trtype:RDMA traddr:$RDMA_TARGET_IP trsvcid:4420" "" -a -s SPDK01 -n Nvme0n1
|
||||
}
|
||||
|
||||
function host1_start_vhost()
|
||||
{
|
||||
rpc_0="python $SPDK_BUILD_DIR/scripts/rpc.py -s $(get_vhost_dir 0)/rpc.sock"
|
||||
|
||||
notice "Starting vhost0 instance on local server"
|
||||
trap 'host1_cleanup_vhost; error_exit "${FUNCNAME}" "${LINENO}"' INT ERR EXIT
|
||||
spdk_vhost_run --conf-path=$BASE_DIR --vhost-num=0
|
||||
$rpc_0 construct_nvme_bdev -b Nvme0 -t rdma -f ipv4 -a $RDMA_TARGET_IP -s 4420 -n "nqn.2018-02.io.spdk:cnode1"
|
||||
$rpc_0 construct_vhost_scsi_controller $incoming_vm_ctrlr
|
||||
$rpc_0 add_vhost_scsi_lun $incoming_vm_ctrlr 0 Nvme0n1
|
||||
|
||||
vm_setup --os="$share_dir/migration.qcow2" --force=$incoming_vm --disk-type=spdk_vhost_scsi --disks=VhostScsi0 \
|
||||
--migrate-to=$target_vm --memory=512 --queue_num=1
|
||||
|
||||
# TODO: Fix loop calculating cpu_num in common.sh
|
||||
# We need -smp 1 and -queue_num 1 for this test to work, and this loop
|
||||
# in some cases calculates wrong cpu_num.
|
||||
sed -i "s#smp 2#smp 1#g" $VM_BASE_DIR/$incoming_vm/run.sh
|
||||
vm_run $incoming_vm
|
||||
vm_wait_for_boot 300 $incoming_vm
|
||||
}
|
||||
|
||||
function cleanup_share()
|
||||
{
|
||||
set +e
|
||||
notice "Cleaning up share directory on remote and local server"
|
||||
ssh_remote $MGMT_INITIATOR_IP "umount $VM_BASE_DIR"
|
||||
ssh_remote $MGMT_INITIATOR_IP "umount $share_dir; rm -f $share_dir/*"
|
||||
rm -f $share_dir/migration.qcow2
|
||||
rm -f $share_dir/spdk.tar.gz
|
||||
set -e
|
||||
}
|
||||
|
||||
function host_1_create_share()
|
||||
{
|
||||
notice "Creating share directory on local server to re-use on remote"
|
||||
mkdir -p $share_dir
|
||||
mkdir -p $VM_BASE_DIR # This dir would've been created later but we need it now
|
||||
rm -rf $share_dir/spdk.tar.gz $share_dir/spdk || true
|
||||
cp $os_image $share_dir/migration.qcow2
|
||||
tar --exclude="*.o"--exclude="*.d" --exclude="*.git" -C $SPDK_BUILD_DIR -zcf $share_dir/spdk.tar.gz .
|
||||
}
|
||||
|
||||
function host_2_create_share()
|
||||
{
|
||||
# Copy & compile the sources for later use on remote server.
|
||||
ssh_remote $MGMT_INITIATOR_IP "uname -a"
|
||||
ssh_remote $MGMT_INITIATOR_IP "mkdir -p $share_dir"
|
||||
ssh_remote $MGMT_INITIATOR_IP "mkdir -p $VM_BASE_DIR"
|
||||
ssh_remote $MGMT_INITIATOR_IP "sshfs -o ssh_command=\"ssh -i $SPDK_VHOST_SSH_KEY_FILE\" root@$MGMT_TARGET_IP:$VM_BASE_DIR $VM_BASE_DIR"
|
||||
ssh_remote $MGMT_INITIATOR_IP "sshfs -o ssh_command=\"ssh -i $SPDK_VHOST_SSH_KEY_FILE\" root@$MGMT_TARGET_IP:$share_dir $share_dir"
|
||||
ssh_remote $MGMT_INITIATOR_IP "mkdir -p $share_dir/spdk"
|
||||
ssh_remote $MGMT_INITIATOR_IP "tar -zxf $share_dir/spdk.tar.gz -C $share_dir/spdk --strip-components=1"
|
||||
ssh_remote $MGMT_INITIATOR_IP "cd $share_dir/spdk; make clean; ./configure --with-rdma --enable-debug; make -j40"
|
||||
}
|
||||
|
||||
function host_2_start_vhost()
|
||||
{
|
||||
ssh_remote $MGMT_INITIATOR_IP "nohup $share_dir/spdk/test/vhost/migration/migration.sh --test-cases=3b --work-dir=$TEST_DIR --os=$share_dir/migration.qcow2 &>$share_dir/output.log &"
|
||||
notice "Waiting for remote to be done with vhost & VM setup..."
|
||||
wait_for_remote
|
||||
}
|
||||
|
||||
function setup_share()
|
||||
{
|
||||
trap 'cleanup_share; error_exit "${FUNCNAME}" "${LINENO}"' INT ERR EXIT
|
||||
host_1_create_share
|
||||
host_2_create_share
|
||||
}
|
||||
|
||||
function migration_tc3()
|
||||
{
|
||||
check_rdma_connection
|
||||
setup_share
|
||||
host1_start_nvmf
|
||||
host1_start_vhost
|
||||
host_2_start_vhost
|
||||
|
||||
# Do migration
|
||||
notice "Starting fio on local VM"
|
||||
vm_check_scsi_location $incoming_vm
|
||||
|
||||
run_fio $fio_bin --job-file="$job_file" --local --vm="${incoming_vm}$(printf ':/dev/%s' $SCSI_DISK)"
|
||||
sleep 5
|
||||
|
||||
if ! is_fio_running $incoming_vm; then
|
||||
vh_ssh $incoming_vm "cat /root/$(basename ${job_file}).out"
|
||||
error "Fio not running on local VM before starting migration!"
|
||||
fi
|
||||
|
||||
vm_migrate $incoming_vm $RDMA_INITIATOR_IP
|
||||
sleep 1
|
||||
|
||||
# Verify migration on remote host and clean up vhost
|
||||
ssh_remote $MGMT_INITIATOR_IP "pkill -CONT -F $TEST_DIR/tc3b.pid"
|
||||
notice "Waiting for remote to finish FIO on VM and clean up..."
|
||||
wait_for_remote
|
||||
|
||||
# Clean up local stuff
|
||||
host1_cleanup_vhost
|
||||
cleanup_share
|
||||
}
|
||||
|
||||
migration_tc3
|
80
test/vhost/migration/migration-tc3b.sh
Executable file
80
test/vhost/migration/migration-tc3b.sh
Executable file
@ -0,0 +1,80 @@
|
||||
# Set -m option is needed to be able to use "suspend" command
|
||||
# as we are usin non-interactive session to connect to remote.
|
||||
# Without -m it would be not possible to suspend the process.
|
||||
set -m
|
||||
source $BASE_DIR/autotest.config
|
||||
|
||||
RDMA_TARGET_IP="10.0.0.1"
|
||||
incoming_vm=1
|
||||
target_vm=2
|
||||
target_vm_ctrl=naa.VhostScsi0.$target_vm
|
||||
rpc="python $SPDK_BUILD_DIR/scripts/rpc.py -s $(get_vhost_dir 1)/rpc.sock"
|
||||
share_dir=$TEST_DIR/share
|
||||
|
||||
function host_2_cleanup_vhost()
|
||||
{
|
||||
notice "Shutting down VM $target_vm"
|
||||
vm_kill $target_vm
|
||||
|
||||
notice "Removing bdev & controller from vhost 1 on remote server"
|
||||
$rpc delete_bdev Nvme0n1
|
||||
$rpc remove_vhost_controller $target_vm_ctrl
|
||||
|
||||
notice "Shutting down vhost app"
|
||||
spdk_vhost_kill 1
|
||||
sleep 1
|
||||
}
|
||||
|
||||
function host_2_start_vhost()
|
||||
{
|
||||
echo "BASE DIR $TEST_DIR"
|
||||
vhost_work_dir=$TEST_DIR/vhost1
|
||||
mkdir -p $vhost_work_dir
|
||||
rm -f $vhost_work_dir/*
|
||||
|
||||
notice "Starting vhost 1 instance on remote server"
|
||||
trap 'host_2_cleanup_vhost; error_exit "${FUNCNAME}" "${LINENO}"' INT ERR EXIT
|
||||
spdk_vhost_run --conf-path=$BASE_DIR --vhost-num=1
|
||||
|
||||
$rpc construct_nvme_bdev -b Nvme0 -t rdma -f ipv4 -a $RDMA_TARGET_IP -s 4420 -n "nqn.2018-02.io.spdk:cnode1"
|
||||
$rpc construct_vhost_scsi_controller $target_vm_ctrl
|
||||
$rpc add_vhost_scsi_lun $target_vm_ctrl 0 Nvme0n1
|
||||
|
||||
vm_setup --os="$os_image" --force=$target_vm --disk-type=spdk_vhost_scsi --disks=VhostScsi0 \
|
||||
--memory=512 --vhost-num=1 --incoming=$incoming_vm
|
||||
vm_run $target_vm
|
||||
sleep 1
|
||||
|
||||
# Use this file as a flag to notify main script
|
||||
# that setup on remote server is done
|
||||
echo "DONE" > $share_dir/DONE
|
||||
}
|
||||
|
||||
echo $$ > $TEST_DIR/tc3b.pid
|
||||
host_2_start_vhost
|
||||
suspend -f
|
||||
|
||||
if ! vm_os_booted $target_vm; then
|
||||
fail "VM$target_vm is not running!"
|
||||
fi
|
||||
|
||||
if ! is_fio_running $target_vm; then
|
||||
vm_ssh $target_vm "cat /root/migration-tc3.job.out"
|
||||
error "FIO is not running on remote server after migration!"
|
||||
fi
|
||||
|
||||
notice "Waiting for FIO to finish on remote server VM"
|
||||
timeout=40
|
||||
while is_fio_running $target_vm; do
|
||||
sleep 1
|
||||
echo -n "."
|
||||
if (( timeout-- == 0 )); then
|
||||
error "timeout while waiting for FIO!"
|
||||
fi
|
||||
done
|
||||
|
||||
notice "FIO result after migration:"
|
||||
vm_ssh $target_vm "cat /root/migration-tc3.job.out"
|
||||
|
||||
host_2_cleanup_vhost
|
||||
echo "DONE" > $share_dir/DONE
|
@ -70,6 +70,11 @@ function vm_migrate()
|
||||
local target_vm_dir="$(readlink -e $from_vm_dir/vm_migrate_to)"
|
||||
local target_vm="$(basename $target_vm_dir)"
|
||||
local target_vm_migration_port="$(cat $target_vm_dir/migration_port)"
|
||||
if [[ -n "$2" ]]; then
|
||||
local target_ip=$2
|
||||
else
|
||||
local target_ip="127.0.0.1"
|
||||
fi
|
||||
|
||||
# Sanity check if target VM (QEMU) is configured to accept source VM (QEMU) migration
|
||||
if [[ "$(readlink -e ${target_vm_dir}/vm_incoming)" != "$(readlink -e ${from_vm_dir})" ]]; then
|
||||
@ -80,7 +85,7 @@ function vm_migrate()
|
||||
notice "Migrating VM $1 to VM "$(basename $target_vm_dir)
|
||||
echo -e \
|
||||
"migrate_set_speed 1g\n" \
|
||||
"migrate tcp:127.0.0.1:$target_vm_migration_port\n" \
|
||||
"migrate tcp:$target_ip:$target_vm_migration_port\n" \
|
||||
"info migrate\n" \
|
||||
"quit" | vm_monitor_send $1 "$from_vm_dir/migration_result"
|
||||
|
||||
@ -90,10 +95,15 @@ function vm_migrate()
|
||||
fail "Migration failed:\n"
|
||||
fi
|
||||
|
||||
# Don't perform the following check if target VM is on remote server
|
||||
# as we won't have access to it.
|
||||
# If you need this check then perform it on your own.
|
||||
if [[ "$target_ip" == "127.0.0.1" ]]; then
|
||||
if ! vm_os_booted $target_vm; then
|
||||
fail "VM$target_vm is not running"
|
||||
cat $target_vm $target_vm_dir/cont_result
|
||||
fi
|
||||
fi
|
||||
|
||||
notice "Migration complete"
|
||||
timing_exit vm_migrate
|
||||
@ -114,9 +124,6 @@ function is_fio_running()
|
||||
return $ret
|
||||
}
|
||||
|
||||
# FIXME: this shoul'd not be needed
|
||||
vm_kill_all
|
||||
|
||||
for test_case in ${test_cases//,/ }; do
|
||||
assert_number "$test_case"
|
||||
notice "==============================="
|
||||
|
Loading…
Reference in New Issue
Block a user