test/vhost_perf: refactor test scripts to use disk map

Use provided configuration file via --disk-map option
instead of creating bdevs and VMs in ordered sequence
(e.g. 0 to 10, etc.).

This allows to:
- specify which NVMe device we want to use
  (PCI BDF identifier)
- how to name it in  SPDK nvme bdev configuration
- how many splits or lvol bdevs to create on this device
- which VMs should use created bdevs

With CPU mask configuration file this allows to better
control resources when running the test (especially in
case of NUMA optimization where using sequential for/while
loops is not a good approach).

vm_count and max_disks parameters removed. These are not
needed anymore as they're controlled by config file.

Example of config file contents:
(BDF,Spdk NvmeBdev name,Split count,VM list)
0000:1b:00.0,Nvme1,2,2 3
0000:89:00.0,Nvme3,4,4 5 6 7

Change-Id: I9fc73458825d8072537aa04880765a048e034ce4
Signed-off-by: Karol Latecki <karol.latecki@intel.com>
Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/464565
Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
This commit is contained in:
Karol Latecki 2019-08-08 17:29:49 +02:00 committed by Jim Harris
parent 2a00a12892
commit 6df2fa8c2e

View File

@ -5,7 +5,7 @@ rootdir=$(readlink -f $testdir/../../..)
source $rootdir/test/common/autotest_common.sh source $rootdir/test/common/autotest_common.sh
source $rootdir/test/vhost/common.sh source $rootdir/test/vhost/common.sh
vm_count=1 vhost_num="0"
vm_memory=2048 vm_memory=2048
vm_sar_enable=false vm_sar_enable=false
host_sar_enable=false host_sar_enable=false
@ -13,19 +13,26 @@ sar_delay="0"
sar_interval="1" sar_interval="1"
sar_count="10" sar_count="10"
vm_throttle="" vm_throttle=""
max_disks=""
ctrl_type="spdk_vhost_scsi" ctrl_type="spdk_vhost_scsi"
use_split=false use_split=false
kernel_cpus="" kernel_cpus=""
run_precondition=false run_precondition=false
lvol_stores=() lvol_stores=()
lvol_bdevs=() lvol_bdevs=()
split_bdevs=()
used_vms="" used_vms=""
wwpn_prefix="naa.5001405bc6498" wwpn_prefix="naa.5001405bc6498"
fio_bin="--fio-bin=/home/sys_sgsw/fio_ubuntu" fio_bin="--fio-bin=/home/sys_sgsw/fio_ubuntu"
fio_iterations=1 fio_iterations=1
precond_fio_bin="/usr/src/fio/fio" precond_fio_bin="/usr/src/fio/fio"
disk_map=""
disk_cfg_bdfs=()
disk_cfg_spdk_names=()
disk_cfg_splits=()
disk_cfg_vms=()
disk_cfg_kernel_names=()
function usage() function usage()
{ {
@ -39,10 +46,6 @@ function usage()
echo " of binary is recommended." echo " of binary is recommended."
echo " --fio-job=PATH Fio config to use for test." echo " --fio-job=PATH Fio config to use for test."
echo " --fio-iterations=INT Number of times to run specified workload." echo " --fio-iterations=INT Number of times to run specified workload."
echo " --vm-count=INT Total number of virtual machines to launch in this test;"
echo " Each VM will get one bdev (lvol or split vbdev)"
echo " to run FIO test."
echo " Default: 1"
echo " --vm-memory=INT Amount of RAM memory (in MB) to pass to a single VM." echo " --vm-memory=INT Amount of RAM memory (in MB) to pass to a single VM."
echo " Default: 2048 MB" echo " Default: 2048 MB"
echo " --vm-image=PATH OS image to use for running the VMs." echo " --vm-image=PATH OS image to use for running the VMs."
@ -53,8 +56,6 @@ function usage()
echo " --sar-interval=INT Interval (seconds) argument for SAR. Default: 1s." echo " --sar-interval=INT Interval (seconds) argument for SAR. Default: 1s."
echo " --sar-count=INT Count argument for SAR. Default: 10." echo " --sar-count=INT Count argument for SAR. Default: 10."
echo " --vm-throttle-iops=INT I/Os throttle rate in IOPS for each device on the VMs." echo " --vm-throttle-iops=INT I/Os throttle rate in IOPS for each device on the VMs."
echo " --max-disks=INT Maximum number of NVMe drives to use in test."
echo " Default: will use all available NVMes."
echo " --ctrl-type=TYPE Controller type to use for test:" echo " --ctrl-type=TYPE Controller type to use for test:"
echo " spdk_vhost_scsi - use spdk vhost scsi" echo " spdk_vhost_scsi - use spdk vhost scsi"
echo " spdk_vhost_blk - use spdk vhost block" echo " spdk_vhost_blk - use spdk vhost block"
@ -66,6 +67,12 @@ function usage()
echo " --precond-fio-bin FIO binary used for SPDK fio plugin precondition. Default: /usr/src/fio/fio." echo " --precond-fio-bin FIO binary used for SPDK fio plugin precondition. Default: /usr/src/fio/fio."
echo " --custom-cpu-cfg=PATH Custom CPU config for test." echo " --custom-cpu-cfg=PATH Custom CPU config for test."
echo " Default: spdk/test/vhost/common/autotest.config" echo " Default: spdk/test/vhost/common/autotest.config"
echo " --disk-map Disk map for given test. Specify which disks to use, their SPDK name,"
echo " how many times to split them and which VMs should be attached to created bdevs."
echo " Example:"
echo " NVME PCI BDF,Spdk Bdev Name,Split Count,VM List"
echo " 0000:1a:00.0,Nvme0,2,0 1"
echo " 0000:1b:00.0,Nvme1,2,2 3"
echo "-x set -x for script debug" echo "-x set -x for script debug"
exit 0 exit 0
} }
@ -88,17 +95,16 @@ function cleanup_lvol_cfg()
function cleanup_split_cfg() function cleanup_split_cfg()
{ {
notice "Removing split vbdevs" notice "Removing split vbdevs"
for (( i=0; i<max_disks; i++ ));do for disk in "${disk_cfg_spdk_names[@]}"; do
$rpc_py bdev_split_delete Nvme${i}n1 $rpc_py bdev_split_delete ${disk}n1
done done
} }
function cleanup_parted_config() function cleanup_parted_config()
{ {
local disks notice "Removing parted disk configuration"
disks=$(find /dev/ -maxdepth 1 -name 'nvme*n1' | sort --version-sort) for disk in "${disk_cfg_kernel_names[@]}"; do
for disk in $disks; do parted -s /dev/${disk}n1 rm 1
parted -s $disk rm 1
done done
} }
@ -109,6 +115,36 @@ function cleanup_kernel_vhost()
cleanup_parted_config cleanup_parted_config
} }
function create_vm() {
vm_num=$1
setup_cmd="vm_setup --disk-type=$ctrl_type --force=$vm_num --memory=$vm_memory --os=$VM_IMAGE"
if [[ "$ctrl_type" == "kernel_vhost" ]]; then
x=$(printf %03d $vm_num)
setup_cmd+=" --disks=${wwpn_prefix}${x}"
else
setup_cmd+=" --disks=0"
fi
$setup_cmd
used_vms+=" $vm_num"
echo "Added to used vms"
echo $used_vms
}
function create_spdk_controller() {
vm_num=$1
bdev=$2
if [[ "$ctrl_type" == "spdk_vhost_scsi" ]]; then
$rpc_py vhost_create_scsi_controller naa.0.$vm_num
notice "Created vhost scsi controller naa.0.$vm_num"
$rpc_py vhost_scsi_controller_add_target naa.0.$vm_num 0 $bdev
notice "Added LUN 0/$bdev to controller naa.0.$vm_num"
elif [[ "$ctrl_type" == "spdk_vhost_blk" ]]; then
$rpc_py vhost_create_blk_controller naa.0.$vm_num $bdev
notice "Created vhost blk controller naa.0.$vm_num $bdev"
fi
}
while getopts 'xh-:' optchar; do while getopts 'xh-:' optchar; do
case "$optchar" in case "$optchar" in
-) -)
@ -117,7 +153,6 @@ while getopts 'xh-:' optchar; do
fio-bin=*) fio_bin="--fio-bin=${OPTARG#*=}" ;; fio-bin=*) fio_bin="--fio-bin=${OPTARG#*=}" ;;
fio-job=*) fio_job="${OPTARG#*=}" ;; fio-job=*) fio_job="${OPTARG#*=}" ;;
fio-iterations=*) fio_iterations="${OPTARG#*=}" ;; fio-iterations=*) fio_iterations="${OPTARG#*=}" ;;
vm-count=*) vm_count="${OPTARG#*=}" ;;
vm-memory=*) vm_memory="${OPTARG#*=}" ;; vm-memory=*) vm_memory="${OPTARG#*=}" ;;
vm-image=*) VM_IMAGE="${OPTARG#*=}" ;; vm-image=*) VM_IMAGE="${OPTARG#*=}" ;;
vm-sar-enable) vm_sar_enable=true ;; vm-sar-enable) vm_sar_enable=true ;;
@ -126,13 +161,13 @@ while getopts 'xh-:' optchar; do
sar-interval=*) sar_interval="${OPTARG#*=}" ;; sar-interval=*) sar_interval="${OPTARG#*=}" ;;
sar-count=*) sar_count="${OPTARG#*=}" ;; sar-count=*) sar_count="${OPTARG#*=}" ;;
vm-throttle-iops=*) vm_throttle="${OPTARG#*=}" ;; vm-throttle-iops=*) vm_throttle="${OPTARG#*=}" ;;
max-disks=*) max_disks="${OPTARG#*=}" ;;
ctrl-type=*) ctrl_type="${OPTARG#*=}" ;; ctrl-type=*) ctrl_type="${OPTARG#*=}" ;;
use-split) use_split=true ;; use-split) use_split=true ;;
run-precondition) run_precondition=true ;; run-precondition) run_precondition=true ;;
precond-fio-bin=*) precond_fio_bin="${OPTARG#*=}" ;; precond-fio-bin=*) precond_fio_bin="${OPTARG#*=}" ;;
limit-kernel-vhost=*) kernel_cpus="${OPTARG#*=}" ;; limit-kernel-vhost=*) kernel_cpus="${OPTARG#*=}" ;;
custom-cpu-cfg=*) custom_cpu_cfg="${OPTARG#*=}" ;; custom-cpu-cfg=*) custom_cpu_cfg="${OPTARG#*=}" ;;
disk-map=*) disk_map="${OPTARG#*=}" ;;
*) usage $0 "Invalid argument '$OPTARG'" ;; *) usage $0 "Invalid argument '$OPTARG'" ;;
esac esac
;; ;;
@ -147,6 +182,10 @@ rpc_py="$rootdir/scripts/rpc.py -s $(get_vhost_dir 0)/rpc.sock"
if [[ -n $custom_cpu_cfg ]]; then if [[ -n $custom_cpu_cfg ]]; then
source $custom_cpu_cfg source $custom_cpu_cfg
vhost_reactor_mask="vhost_${vhost_num}_reactor_mask"
vhost_reactor_mask="${!vhost_reactor_mask}"
vhost_master_core="vhost_${vhost_num}_master_core"
vhost_master_core="${!vhost_master_core}"
fi fi
if [[ -z $fio_job ]]; then if [[ -z $fio_job ]]; then
@ -155,40 +194,11 @@ if [[ -z $fio_job ]]; then
fi fi
trap 'error_exit "${FUNCNAME}" "${LINENO}"' INT ERR trap 'error_exit "${FUNCNAME}" "${LINENO}"' INT ERR
notice "Get NVMe disks:"
nvmes=($(iter_pci_class_code 01 08 02))
if [[ -z $max_disks ]]; then if [[ -z $disk_map ]]; then
max_disks=${#nvmes[@]} fail "No disk map provided for test. Exiting."
fi fi
if [[ ${#nvmes[@]} -lt max_disks ]]; then
fail "Number of NVMe drives (${#nvmes[@]}) is lower than number of requested disks for test ($max_disks)"
fi
# Calculate number of needed splits per NVMe
# so that each VM gets it's own bdev during test.
splits=()
if [[ $vm_count -le $max_disks ]]; then
for i in $(seq 0 $((max_disks - 1))); do
splits+=("1")
done
else
#Calculate least minimum number of splits on each disks
for i in $(seq 0 $((max_disks - 1))); do
splits+=( $((vm_count / max_disks)) )
done
# Split up the remainder
for i in $(seq 0 $((vm_count % max_disks - 1))); do
(( splits[i]++ ))
done
fi
notice "Preparing NVMe setup..."
notice "Using $max_disks physical NVMe drives"
notice "Nvme split list: ${splits[*]}"
# ===== Precondition NVMes if specified ===== # ===== Precondition NVMes if specified =====
if [[ $run_precondition == true ]]; then if [[ $run_precondition == true ]]; then
# Using the same precondition routine possible for lvols thanks # Using the same precondition routine possible for lvols thanks
@ -204,102 +214,136 @@ if [[ $run_precondition == true ]]; then
--iodepth=32 --filename="${fio_filename}" || true --iodepth=32 --filename="${fio_filename}" || true
fi fi
# ===== Prepare NVMe splits & run vhost process ===== set +x
readarray disk_cfg < $disk_map
for line in "${disk_cfg[@]}"; do
echo $line
IFS=","
s=($line)
disk_cfg_bdfs+=(${s[0]})
disk_cfg_spdk_names+=(${s[1]})
disk_cfg_splits+=(${s[2]})
disk_cfg_vms+=("${s[3]}")
# Find kernel nvme names
if [[ "$ctrl_type" == "kernel_vhost" ]]; then if [[ "$ctrl_type" == "kernel_vhost" ]]; then
tmp=$(find /sys/devices/pci* -name ${s[0]} -print0 | xargs sh -c 'ls $0/nvme')
disk_cfg_kernel_names+=($tmp)
IFS=" "
fi
done
unset IFS
set -x
if [[ "$ctrl_type" == "kernel_vhost" ]]; then
notice "Configuring kernel vhost..."
trap 'vm_kill_all; sleep 1; cleanup_kernel_vhost; error_exit "${FUNCNAME}" "${LINENO}"' INT ERR trap 'vm_kill_all; sleep 1; cleanup_kernel_vhost; error_exit "${FUNCNAME}" "${LINENO}"' INT ERR
# Split disks using parted for kernel vhost # Split disks using parted for kernel vhost
newline=$'\n' newline=$'\n'
for (( i=0; i<max_disks; i++ ));do backstores=()
parted -s /dev/nvme${i}n1 mklabel msdos for (( i=0; i<${#disk_cfg_kernel_names[@]}; i++ )); do
parted -s /dev/nvme${i}n1 mkpart extended 2048s 100% nvme=${disk_cfg_kernel_names[$i]}
part_size=$((100/${splits[$i]})) # Split 100% of disk into roughly even parts splits=${disk_cfg_splits[$i]}
echo " Creating ${splits[$i]} partitions of relative disk size ${part_size}" notice " Creating extended partition on disk /dev/${nvme}n1"
parted -s /dev/${nvme}n1 mklabel msdos
parted -s /dev/${nvme}n1 mkpart extended 2048s 100%
for p in $(seq 0 $((${splits[$i]} - 1))); do part_size=$((100/${disk_cfg_splits[$i]})) # Split 100% of disk into roughly even parts
echo " Creating ${splits} partitions of relative disk size ${part_size}"
for p in $(seq 0 $((splits - 1))); do
p_start=$((p*part_size)) p_start=$((p*part_size))
p_end=$((p_start+part_size)) p_end=$((p_start+part_size))
parted -s /dev/nvme${i}n1 mkpart logical ${p_start}% ${p_end}% parted -s /dev/${nvme}n1 mkpart logical ${p_start}% ${p_end}%
sleep 3
done done
done
sleep 1
# Prepare kernel vhost configuration # Prepare kernel vhost configuration
# Below grep: match only NVMe partitions which are not "Extended" type. # Below grep: match only NVMe partitions which are not "Extended" type.
# For example: will match nvme0n1p15 but not nvme0n1p1 # For example: will match nvme0n1p15 but not nvme0n1p1
partitions=$(find /dev/ -maxdepth 1 -name 'nvme*' | sort --version-sort | grep -P 'p(?!1$)\d+') partitions=$(find /dev -name "${nvme}n1*" | sort --version-sort | grep -P 'p(?!1$)\d+')
backstores=()
# Create block backstores for vhost kernel process # Create block backstores for vhost kernel process
for p in $partitions; do for p in $partitions; do
backstore_name=$(basename $p) backstore_name=$(basename $p)
backstores+=("$backstore_name") backstores+=("$backstore_name")
targetcli backstores/block create $backstore_name $p targetcli backstores/block create $backstore_name $p
done done
partitions=($partitions)
# Create kernel vhost controllers and add LUNs # Create kernel vhost controllers and add LUNs
for ((i=0; i<${#backstores[*]}; i++)); do # Setup VM configurations
vms_to_run=(${disk_cfg_vms[i]})
for (( j=0; j<${#vms_to_run[@]}; j++ )); do
# WWPN prefix misses 3 characters. Need to complete it # WWPN prefix misses 3 characters. Need to complete it
# using block backstore number # using block backstore number
x=$(printf %03d $i) x=$(printf %03d ${vms_to_run[$j]})
wwpn="${wwpn_prefix}${x}" wwpn="${wwpn_prefix}${x}"
targetcli vhost/ create $wwpn targetcli vhost/ create $wwpn
targetcli vhost/$wwpn/tpg1/luns create /backstores/block/${backstores[$i]} targetcli vhost/$wwpn/tpg1/luns create /backstores/block/$(basename ${partitions[$j]})
create_vm ${vms_to_run[j]}
sleep 1
done done
done
targetcli ls
else else
# Run vhost process and prepare split vbdevs or lvol bdevs notice "Configuring SPDK vhost..."
notice "running SPDK vhost" vhost_run "${vhost_num}" "--no-gen-nvme" "-p ${vhost_master_core}" "-m ${vhost_reactor_mask}"
vhost_run 0
notice "..." notice "..."
if [[ $use_split == true ]]; then if [[ $use_split == true ]]; then
notice "Using split vbdevs" notice "Configuring split bdevs configuration..."
trap 'cleanup_split_cfg; error_exit "${FUNCNAME}" "${LINENO}"' INT ERR trap 'cleanup_split_cfg; error_exit "${FUNCNAME}" "${LINENO}"' INT ERR
split_bdevs=() for (( i=0; i<${#disk_cfg_bdfs[@]}; i++ )); do
for (( i=0; i<max_disks; i++ ));do nvme_bdev=$($rpc_py bdev_nvme_attach_controller -b ${disk_cfg_spdk_names[$i]} -t pcie -a ${disk_cfg_bdfs[$i]})
out=$($rpc_py bdev_split_create Nvme${i}n1 ${splits[$i]}) notice "Created NVMe Bdev: $nvme_bdev with BDF ${disk_cfg_bdfs[$i]}"
for s in $(seq 0 $((${splits[$i]}-1))); do
split_bdevs+=("Nvme${i}n1p${s}") splits=$($rpc_py bdev_split_create $nvme_bdev ${disk_cfg_splits[$i]})
splits=($splits)
notice "Created splits: ${splits[*]} on Bdev ${nvme_bdev}"
for s in "${splits[@]}"; do
split_bdevs+=($s)
done done
vms_to_run=(${disk_cfg_vms[i]})
for (( j=0; j<${#vms_to_run[@]}; j++ )); do
notice "Setting up VM ${vms_to_run[j]}"
create_spdk_controller "${vms_to_run[j]}" ${splits[j]}
create_vm ${vms_to_run[j]}
done
echo " "
done done
bdevs=("${split_bdevs[@]}") bdevs=("${split_bdevs[@]}")
else else
notice "Using logical volumes" notice "Configuring LVOLs..."
trap 'cleanup_lvol_cfg; error_exit "${FUNCNAME}" "${LINENO}"' INT ERR trap 'cleanup_lvol_cfg; error_exit "${FUNCNAME}" "${LINENO}"' INT ERR
for (( i=0; i<max_disks; i++ ));do for (( i=0; i<${#disk_cfg_bdfs[@]}; i++ )); do
ls_guid=$($rpc_py bdev_lvol_create_lvstore Nvme${i}n1 lvs_$i --clear-method none) nvme_bdev=$($rpc_py bdev_nvme_attach_controller -b ${disk_cfg_spdk_names[$i]} -t pcie -a ${disk_cfg_bdfs[$i]})
notice "Created NVMe Bdev: $nvme_bdev with BDF ${disk_cfg_bdfs[$i]}"
ls_guid=$($rpc_py bdev_lvol_create_lvstore $nvme_bdev lvs_$i --clear-method none)
lvol_stores+=("$ls_guid") lvol_stores+=("$ls_guid")
for (( j=0; j<${splits[$i]}; j++)); do notice "Created Lvol Store: $ls_guid on Bdev $nvme_bdev"
vms_to_run=(${disk_cfg_vms[i]})
for (( j=0; j<${disk_cfg_splits[$i]}; j++)); do
free_mb=$(get_lvs_free_mb "$ls_guid") free_mb=$(get_lvs_free_mb "$ls_guid")
size=$((free_mb / (${splits[$i]}-j) )) size=$((free_mb / ((${disk_cfg_splits[$i]}-j)) ))
lb_name=$($rpc_py bdev_lvol_create -u $ls_guid lbd_$j $size --clear-method none) lb_name=$($rpc_py bdev_lvol_create -u $ls_guid lbd_$j $size --clear-method none)
lvol_bdevs+=("$lb_name") lvol_bdevs+=("$lb_name")
done notice "Created LVOL Bdev $lb_name on Lvol Store $ls_guid on Bdev $nvme_bdev"
done
bdevs=("${lvol_bdevs[@]}")
fi
fi
# Prepare VMs and controllers notice "Setting up VM ${vms_to_run[j]}"
for (( i=0; i<vm_count; i++)); do create_spdk_controller "${vms_to_run[j]}" ${lb_name}
vm="vm_$i" create_vm ${vms_to_run[j]}
setup_cmd="vm_setup --disk-type=$ctrl_type --force=$i --memory=$vm_memory"
setup_cmd+=" --os=$VM_IMAGE"
if [[ "$ctrl_type" == "spdk_vhost_scsi" ]]; then
$rpc_py vhost_create_scsi_controller naa.0.$i
$rpc_py vhost_scsi_controller_add_target naa.0.$i 0 ${bdevs[$i]}
setup_cmd+=" --disks=0"
elif [[ "$ctrl_type" == "spdk_vhost_blk" ]]; then
$rpc_py vhost_create_blk_controller naa.$i.$i ${bdevs[$i]}
setup_cmd+=" --disks=$i"
elif [[ "$ctrl_type" == "kernel_vhost" ]]; then
x=$(printf %03d $i)
setup_cmd+=" --disks=${wwpn_prefix}${x}"
fi
$setup_cmd
used_vms+=" $i"
done done
echo " "
done
$rpc_py bdev_lvol_get_lvstores
fi
$rpc_py bdev_get_bdevs
$rpc_py vhost_get_controllers
fi
# Start VMs # Start VMs
# Run VMs # Run VMs
@ -383,7 +427,6 @@ for i in $(seq 1 $fio_iterations); do
done done
fi fi
wait $fio_pid wait $fio_pid
mv $VHOST_DIR/fio_results/$fio_log_fname $VHOST_DIR/fio_results/$fio_log_fname.$i mv $VHOST_DIR/fio_results/$fio_log_fname $VHOST_DIR/fio_results/$fio_log_fname.$i
sleep 1 sleep 1
@ -401,7 +444,7 @@ else
else else
cleanup_lvol_cfg cleanup_lvol_cfg
fi fi
vhost_kill 0 vhost_kill "${vhost_num}"
fi fi
if [[ -n "$kernel_cpus" ]]; then if [[ -n "$kernel_cpus" ]]; then