- Add option to throttle iops in VMs using cgroups - Add option to measure CPU utilization in VMs using SAR - Add option to limit kernel vhost CPU cores (not NUMA optimized) - Add option to do lvol preconditioning using fio bdev plugin before running IO performance test Change-Id: I7e0fcf977be96ecf837385c2abc9d5dabbe2f8c5 Signed-off-by: Karol Latecki <karol.latecki@intel.com> Reviewed-on: https://review.gerrithub.io/c/434229 Reviewed-by: Tomasz Zawadzki <tomasz.zawadzki@intel.com> Reviewed-by: Darek Stojaczyk <dariusz.stojaczyk@intel.com> Reviewed-by: Paweł Niedźwiecki <pawelx.niedzwiecki@intel.com> Reviewed-by: Jim Harris <james.r.harris@intel.com> Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
385 lines
12 KiB
Bash
Executable File
385 lines
12 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
set -e
|
|
|
|
vm_count=1
|
|
vm_memory=2048
|
|
vm_image="/home/sys_sgsw/vhost_vm_image.qcow2"
|
|
vm_sar_enable=false
|
|
vm_sar_delay="0"
|
|
vm_sar_interval="1"
|
|
vm_sar_count="10"
|
|
vm_throttle=""
|
|
max_disks=""
|
|
ctrl_type="spdk_vhost_scsi"
|
|
use_split=false
|
|
kernel_cpus=""
|
|
lvol_precondition=false
|
|
lvol_stores=()
|
|
lvol_bdevs=()
|
|
used_vms=""
|
|
wwpn_prefix="naa.5001405bc6498"
|
|
|
|
fio_bin="--fio-bin=/home/sys_sgsw/fio_ubuntu"
|
|
precond_fio_bin="/usr/src/fio/fio"
|
|
|
|
function usage()
|
|
{
|
|
[[ ! -z $2 ]] && ( echo "$2"; echo ""; )
|
|
echo "Shortcut script for doing automated test"
|
|
echo "Usage: $(basename $1) [OPTIONS]"
|
|
echo
|
|
echo "-h, --help Print help and exit"
|
|
echo " --fio-bin=PATH Path to FIO binary on host.;"
|
|
echo " Binary will be copied to VM, static compilation"
|
|
echo " of binary is recommended."
|
|
echo " --fio-job=PATH Fio config to use for test."
|
|
echo " --vm-count=INT Total number of virtual machines to launch in this test;"
|
|
echo " Each VM will get one bdev (lvol or split vbdev)"
|
|
echo " to run FIO test."
|
|
echo " Default: 1"
|
|
echo " --vm-memory=INT Amount of RAM memory (in MB) to pass to a single VM."
|
|
echo " Default: 2048 MB"
|
|
echo " --vm-image=PATH OS image to use for running the VMs."
|
|
echo " Default: /home/sys_sgsw/vhost_vm_image.qcow2"
|
|
echo " --vm-sar-enable Measure CPU utilization on VM using sar."
|
|
echo " --vm-sar-delay=INT Wait for X seconds before sarting SAR measurement on VMs. Default: 0."
|
|
echo " --vm-sar-interval=INT Interval (seconds) argument for SAR. Default: 1s."
|
|
echo " --vm-sar-count=INT Count argument for SAR. Default: 10."
|
|
echo " --vm-throttle-iops=INT I/Os throttle rate in IOPS for each device on the VMs."
|
|
echo " --max-disks=INT Maximum number of NVMe drives to use in test."
|
|
echo " Default: will use all available NVMes."
|
|
echo " --ctrl-type=TYPE Controller type to use for test:"
|
|
echo " spdk_vhost_scsi - use spdk vhost scsi"
|
|
echo " spdk_vhost_blk - use spdk vhost block"
|
|
echo " kernel_vhost - use kernel vhost scsi"
|
|
echo " Default: spdk_vhost_scsi"
|
|
echo " --use-split Use split vbdevs instead of Logical Volumes"
|
|
echo " --limit-kernel-vhost=INT Limit kernel vhost to run only on a number of CPU cores."
|
|
echo " --lvol-precondition Precondition lvols after creating. Default: true."
|
|
echo " --precond-fio-bin FIO binary used for SPDK fio plugin precondition. Default: /usr/src/fio/fio."
|
|
echo " --custom-cpu-cfg=PATH Custom CPU config for test."
|
|
echo " Default: spdk/test/vhost/common/autotest.config"
|
|
echo "-x set -x for script debug"
|
|
exit 0
|
|
}
|
|
|
|
function cleanup_lvol_cfg()
|
|
{
|
|
notice "Removing lvol bdevs"
|
|
for lvol_bdev in "${lvol_bdevs[@]}"; do
|
|
$rpc_py destroy_lvol_bdev $lvol_bdev
|
|
notice "lvol bdev $lvol_bdev removed"
|
|
done
|
|
|
|
notice "Removing lvol stores"
|
|
for lvol_store in "${lvol_stores[@]}"; do
|
|
$rpc_py destroy_lvol_store -u $lvol_store
|
|
notice "lvol store $lvol_store removed"
|
|
done
|
|
}
|
|
|
|
function cleanup_split_cfg()
|
|
{
|
|
notice "Removing split vbdevs"
|
|
for (( i=0; i<$max_disks; i++ ));do
|
|
$rpc_py destruct_split_vbdev Nvme${i}n1
|
|
done
|
|
}
|
|
|
|
function cleanup_parted_config()
|
|
{
|
|
local disks=$(ls /dev/nvme*n1 | sort --version-sort)
|
|
for disk in $disks; do
|
|
parted -s $disk rm 1
|
|
done
|
|
}
|
|
|
|
function cleanup_kernel_vhost()
|
|
{
|
|
notice "Cleaning kernel vhost configration"
|
|
targetcli clearconfig confirm=True
|
|
cleanup_parted_config
|
|
}
|
|
|
|
while getopts 'xh-:' optchar; do
|
|
case "$optchar" in
|
|
-)
|
|
case "$OPTARG" in
|
|
help) usage $0 ;;
|
|
fio-bin=*) fio_bin="--fio-bin=${OPTARG#*=}" ;;
|
|
fio-job=*) fio_job="${OPTARG#*=}" ;;
|
|
vm-count=*) vm_count="${OPTARG#*=}" ;;
|
|
vm-memory=*) vm_memory="${OPTARG#*=}" ;;
|
|
vm-image=*) vm_image="${OPTARG#*=}" ;;
|
|
vm-sar-enable) vm_sar_enable=true ;;
|
|
vm-sar-delay=*) vm_sar_delay="${OPTARG#*=}" ;;
|
|
vm-sar-interval=*) vm_sar_interval="${OPTARG#*=}" ;;
|
|
vm-sar-count=*) vm_sar_count="${OPTARG#*=}" ;;
|
|
vm-throttle-iops=*) vm_throttle="${OPTARG#*=}" ;;
|
|
max-disks=*) max_disks="${OPTARG#*=}" ;;
|
|
ctrl-type=*) ctrl_type="${OPTARG#*=}" ;;
|
|
use-split) use_split=true ;;
|
|
lvol-precondition) lvol_precondition=true ;;
|
|
precond-fio-bin=*) precond_fio_bin="${OPTARG#*=}" ;;
|
|
limit-kernel-vhost=*) kernel_cpus="${OPTARG#*=}" ;;
|
|
custom-cpu-cfg=*) custom_cpu_cfg="${OPTARG#*=}" ;;
|
|
*) usage $0 "Invalid argument '$OPTARG'" ;;
|
|
esac
|
|
;;
|
|
h) usage $0 ;;
|
|
x) set -x
|
|
x="-x" ;;
|
|
*) usage $0 "Invalid argument '$OPTARG'"
|
|
esac
|
|
done
|
|
|
|
. $(readlink -e "$(dirname $0)/../common/common.sh") || exit 1
|
|
. $(readlink -e "$(dirname $0)/../../../scripts/common.sh") || exit 1
|
|
BASE_DIR=$(readlink -f $(dirname ${BASH_SOURCE[0]}))
|
|
COMMON_DIR="$(cd $(readlink -f $(dirname $0))/../common && pwd)"
|
|
rpc_py="$SPDK_BUILD_DIR/scripts/rpc.py -s $(get_vhost_dir)/rpc.sock"
|
|
|
|
if [[ -n $custom_cpu_cfg ]]; then
|
|
source $custom_cpu_cfg
|
|
fi
|
|
|
|
if [[ -z $fio_job ]]; then
|
|
warning "No FIO job specified! Will use default from common directory."
|
|
fio_job="$COMMON_DIR/fio_jobs/default_integrity.job"
|
|
fi
|
|
|
|
trap 'error_exit "${FUNCNAME}" "${LINENO}"' INT ERR
|
|
notice "Get NVMe disks:"
|
|
nvmes=($(iter_pci_class_code 01 08 02))
|
|
|
|
if [[ -z $max_disks ]]; then
|
|
max_disks=${#nvmes[@]}
|
|
fi
|
|
|
|
if [[ ${#nvmes[@]} -lt max_disks ]]; then
|
|
fail "Number of NVMe drives (${#nvmes[@]}) is lower than number of requested disks for test ($max_disks)"
|
|
fi
|
|
|
|
|
|
# Calculate number of needed splits per NVMe
|
|
# so that each VM gets it's own bdev during test.
|
|
splits=()
|
|
|
|
if [[ $vm_count -le $max_disks ]]; then
|
|
for i in $(seq 0 $((max_disks - 1))); do
|
|
splits+=("1")
|
|
done
|
|
else
|
|
#Calculate least minimum number of splits on each disks
|
|
for i in `seq 0 $((max_disks - 1))`; do
|
|
splits+=( $((vm_count / max_disks)) )
|
|
done
|
|
# Split up the remainder
|
|
for i in `seq 0 $((vm_count % max_disks - 1))`; do
|
|
(( splits[i]++ ))
|
|
done
|
|
fi
|
|
notice "Preparing NVMe setup..."
|
|
notice "Using $max_disks physical NVMe drives"
|
|
notice "Nvme split list: ${splits[@]}"
|
|
|
|
# ===== Prepare NVMe splits & run vhost process =====
|
|
if [[ "$ctrl_type" == "kernel_vhost" ]]; then
|
|
trap 'vm_kill_all; sleep 1; cleanup_kernel_vhost; error_exit "${FUNCNAME}" "${LINENO}"' INT ERR
|
|
# Split disks using parted for kernel vhost
|
|
newline=$'\n'
|
|
for (( i=0; i<$max_disks; i++ ));do
|
|
parted -s /dev/nvme${i}n1 mklabel msdos
|
|
parted -s /dev/nvme${i}n1 mkpart extended 2048s 100%
|
|
part_size=$((100/${splits[$i]})) # Split 100% of disk into roughly even parts
|
|
echo " Creating ${splits[$i]} partitions of relative disk size ${part_size}"
|
|
|
|
for p in $(seq 0 $((${splits[$i]} - 1))); do
|
|
p_start=$(($p*$part_size))
|
|
p_end=$(($p_start+$part_size))
|
|
parted -s /dev/nvme${i}n1 mkpart logical ${p_start}% ${p_end}%
|
|
done
|
|
done
|
|
sleep 1
|
|
|
|
# Prepare kernel vhost configuration
|
|
# Below grep: match only NVMe partitions which are not "Extended" type.
|
|
# For example: will match nvme0n1p15 but not nvme0n1p1
|
|
partitions=$(ls -1 /dev/nvme* | sort --version-sort | grep -P 'p(?!1$)\d+')
|
|
backstores=()
|
|
|
|
# Create block backstores for vhost kernel process
|
|
for p in $partitions; do
|
|
backstore_name=$(basename $p)
|
|
backstores+=("$backstore_name")
|
|
targetcli backstores/block create $backstore_name $p
|
|
done
|
|
|
|
# Create kernel vhost controllers and add LUNs
|
|
for ((i=0; i<${#backstores[*]}; i++)); do
|
|
# WWPN prefix misses 3 characters. Need to complete it
|
|
# using block backstore number
|
|
x=$(printf %03d $i)
|
|
wwpn="${wwpn_prefix}${x}"
|
|
targetcli vhost/ create $wwpn
|
|
targetcli vhost/$wwpn/tpg1/luns create /backstores/block/${backstores[$i]}
|
|
done
|
|
else
|
|
# Run vhost process and prepare split vbdevs or lvol bdevs
|
|
notice "running SPDK vhost"
|
|
spdk_vhost_run
|
|
notice "..."
|
|
|
|
if [[ $use_split == true ]]; then
|
|
notice "Using split vbdevs"
|
|
trap 'cleanup_split_cfg; error_exit "${FUNCNAME}" "${LINENO}"' INT ERR
|
|
split_bdevs=()
|
|
for (( i=0; i<$max_disks; i++ ));do
|
|
out=$($rpc_py construct_split_vbdev Nvme${i}n1 ${splits[$i]})
|
|
for s in $(seq 0 $((${splits[$i]}-1))); do
|
|
split_bdevs+=("Nvme${i}n1p${s}")
|
|
done
|
|
done
|
|
bdevs=("${split_bdevs[@]}")
|
|
else
|
|
notice "Using logical volumes"
|
|
trap 'cleanup_lvol_cfg; error_exit "${FUNCNAME}" "${LINENO}"' INT ERR
|
|
for (( i=0; i<$max_disks; i++ ));do
|
|
ls_guid=$($rpc_py construct_lvol_store Nvme${i}n1 lvs_$i)
|
|
lvol_stores+=("$ls_guid")
|
|
for (( j=0; j<${splits[$i]}; j++)); do
|
|
free_mb=$(get_lvs_free_mb "$ls_guid")
|
|
size=$((free_mb / (${splits[$i]}-j) ))
|
|
lb_name=$($rpc_py construct_lvol_bdev -u $ls_guid lbd_$j $size)
|
|
lvol_bdevs+=("$lb_name")
|
|
done
|
|
done
|
|
bdevs=("${lvol_bdevs[@]}")
|
|
fi
|
|
fi
|
|
|
|
if [[ ! "$ctrl_type" == "kernel_vhost" && $lvol_precondition == true && $use_split == false ]]; then
|
|
# Need to precondition lvols due to UNMAP done after creation
|
|
# of lvol_stores. Kill vhost for now and run fio_plugin over all lvol bdevs
|
|
spdk_vhost_kill
|
|
$SPDK_BUILD_DIR/scripts/gen_nvme.sh > $SPDK_BUILD_DIR/nvme.cfg
|
|
fio_filename=$(printf ":%s" "${bdevs[@]}")
|
|
fio_filename=${fio_filename:1}
|
|
$precond_fio_bin --name="lvol_precondition" \
|
|
--ioengine="${SPDK_BUILD_DIR}/examples/bdev/fio_plugin/fio_plugin" \
|
|
--rw="write" --spdk_conf="${SPDK_BUILD_DIR}/nvme.cfg" --thread="1" \
|
|
--group_reporting --direct="1" --size="100%" --loops="2" --bs="256k" \
|
|
--filename="${fio_filename}" || true
|
|
spdk_vhost_run
|
|
fi
|
|
|
|
# Prepare VMs and controllers
|
|
for (( i=0; i<$vm_count; i++)); do
|
|
vm="vm_$i"
|
|
|
|
setup_cmd="vm_setup --disk-type=$ctrl_type --force=$i --memory=$vm_memory"
|
|
setup_cmd+=" --os=$vm_image"
|
|
|
|
if [[ "$ctrl_type" == "spdk_vhost_scsi" ]]; then
|
|
$rpc_py construct_vhost_scsi_controller naa.0.$i
|
|
$rpc_py add_vhost_scsi_lun naa.0.$i 0 ${bdevs[$i]}
|
|
setup_cmd+=" --disks=0"
|
|
elif [[ "$ctrl_type" == "spdk_vhost_blk" ]]; then
|
|
$rpc_py construct_vhost_blk_controller naa.$i.$i ${bdevs[$i]}
|
|
setup_cmd+=" --disks=$i"
|
|
elif [[ "$ctrl_type" == "kernel_vhost" ]]; then
|
|
x=$(printf %03d $i)
|
|
setup_cmd+=" --disks=${wwpn_prefix}${x}"
|
|
fi
|
|
$setup_cmd
|
|
used_vms+=" $i"
|
|
done
|
|
|
|
# Start VMs
|
|
# Run VMs
|
|
vm_run $used_vms
|
|
vm_wait_for_boot 300 $used_vms
|
|
|
|
if [[ -n "$kernel_cpus" ]]; then
|
|
mkdir -p /sys/fs/cgroup/cpuset/spdk
|
|
kernel_mask=$vhost_0_reactor_mask
|
|
kernel_mask=${kernel_mask#"["}
|
|
kernel_mask=${kernel_mask%"]"}
|
|
|
|
echo "$kernel_mask" >> /sys/fs/cgroup/cpuset/spdk/cpuset.cpus
|
|
echo "0-1" >> /sys/fs/cgroup/cpuset/spdk/cpuset.mems
|
|
|
|
kernel_vhost_pids=$(ps aux | grep -Po "^root\s+\K(\d+)(?=.*\[vhost-\d+\])")
|
|
for kpid in $kernel_vhost_pids; do
|
|
echo "Limiting kernel vhost pid ${kpid}"
|
|
echo "${kpid}" >> /sys/fs/cgroup/cpuset/spdk/tasks
|
|
done
|
|
fi
|
|
|
|
# Run FIO
|
|
fio_disks=""
|
|
for vm_num in $used_vms; do
|
|
vm_dir=$VM_BASE_DIR/$vm_num
|
|
host_name="VM-$vm_num"
|
|
vm_ssh $vm_num "hostname $host_name"
|
|
vm_start_fio_server $fio_bin $vm_num
|
|
|
|
if [[ "$ctrl_type" == "spdk_vhost_scsi" ]]; then
|
|
vm_check_scsi_location $vm_num
|
|
elif [[ "$ctrl_type" == "spdk_vhost_blk" ]]; then
|
|
vm_check_blk_location $vm_num
|
|
elif [[ "$ctrl_type" == "kernel_vhost" ]]; then
|
|
vm_check_scsi_location $vm_num
|
|
fi
|
|
|
|
if [[ -n "$vm_throttle" ]]; then
|
|
block=$(printf '%s' $SCSI_DISK)
|
|
major_minor=$(vm_ssh "$vm_num" "cat /sys/block/$block/dev")
|
|
vm_ssh "$vm_num" "echo \"$major_minor $vm_throttle\" > /sys/fs/cgroup/blkio/blkio.throttle.read_iops_device"
|
|
vm_ssh "$vm_num" "echo \"$major_minor $vm_throttle\" > /sys/fs/cgroup/blkio/blkio.throttle.write_iops_device"
|
|
fi
|
|
|
|
fio_disks+=" --vm=${vm_num}$(printf ':/dev/%s' $SCSI_DISK)"
|
|
done
|
|
|
|
# Run FIO traffic
|
|
run_fio $fio_bin --job-file="$fio_job" --out="$TEST_DIR/fio_results" --json $fio_disks &
|
|
fio_pid=$!
|
|
|
|
if $vm_sar_enable; then
|
|
sleep $vm_sar_delay
|
|
mkdir -p $TEST_DIR/fio_results/sar_stats
|
|
pids=""
|
|
for vm_num in $used_vms; do
|
|
vm_ssh "$vm_num" "mkdir -p /root/sar; sar -P ALL $vm_sar_interval $vm_sar_count >> /root/sar/sar_stats_VM${vm_num}.txt" &
|
|
pids+=" $!"
|
|
done
|
|
for j in $pids; do
|
|
wait $j
|
|
done
|
|
for vm_num in $used_vms; do
|
|
vm_scp "$vm_num" "root@127.0.0.1:/root/sar/sar_stats_VM${vm_num}.txt" "$TEST_DIR/fio_results/sar_stats"
|
|
done
|
|
fi
|
|
|
|
wait $fio_pid
|
|
|
|
notice "Shutting down virtual machines..."
|
|
vm_shutdown_all
|
|
|
|
if [[ "$ctrl_type" == "kernel_vhost" ]]; then
|
|
cleanup_kernel_vhost || true
|
|
else
|
|
notice "Shutting down SPDK vhost app..."
|
|
if [[ $use_split == true ]]; then
|
|
cleanup_split_cfg
|
|
else
|
|
cleanup_lvol_cfg
|
|
fi
|
|
spdk_vhost_kill
|
|
fi
|
|
|
|
if [[ -n "$kernel_cpus" ]]; then
|
|
rmdir /sys/fs/cgroup/cpuset/spdk
|
|
fi
|