Spdk/test/nvme/perf/run_perf.sh
paul luse eb53c23236 add (c) and SPDX header to bash files as needed
per Intel policy to include file commit date using git cmd
below.  The policy does not apply to non-Intel (C) notices.

git log --follow -C90% --format=%ad --date default <file> | tail -1

and then pull just the year from the result.

Intel copyrights were not added to files where Intel either had
no contribution ot the contribution lacked substance (ie license
header updates, formatting changes, etc)

For intel copyrights added, --follow and -C95% were used.

Signed-off-by: paul luse <paul.e.luse@intel.com>
Change-Id: I2ef86976095b88a9bf5b1003e59f3943cd6bbe4c
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/15209
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Community-CI: Mellanox Build Bot
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
Reviewed-by: Krzysztof Karas <krzysztof.karas@intel.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
2022-11-29 08:27:51 +00:00

433 lines
17 KiB
Bash
Executable File

#!/usr/bin/env bash
# SPDX-License-Identifier: BSD-3-Clause
# Copyright (C) 2018 Intel Corporation
# All rights reserved.
#
set -e
# Dir variables and sourcing common files
testdir=$(readlink -f $(dirname $0))
rootdir=$(readlink -f $testdir/../../..)
plugin_dir=$rootdir/build/fio
bdevperf_dir=$rootdir/examples/bdev/bdevperf
nvmeperf_dir=$rootdir/build/examples
source $testdir/common.sh
source $rootdir/scripts/common.sh || exit 1
source $rootdir/test/common/autotest_common.sh
# Global & default variables
declare -A KERNEL_ENGINES
KERNEL_ENGINES=(
["kernel-libaio"]="--ioengine=libaio"
["kernel-classic-polling"]="--ioengine=pvsync2 --hipri=100"
["kernel-hybrid-polling"]="--ioengine=pvsync2 --hipri=100"
["kernel-io-uring"]="--ioengine=io_uring")
RW=randrw
MIX=100
IODEPTH=256
BLK_SIZE=4096
RUNTIME=600
RAMP_TIME=30
NUMJOBS=1
REPEAT_NO=3
GTOD_REDUCE=false
SAMPLING_INT=0
LATENCY_LOG=false
IO_BATCH_SUBMIT=0
IO_BATCH_COMPLETE=0
FIO_BIN=$CONFIG_FIO_SOURCE_DIR/fio
FIO_FNAME_STRATEGY="group"
TMP_RESULT_FILE=$testdir/result.json
MAIN_CORE=""
TMP_BPF_FILE=$testdir/bpftraces.txt
PLUGIN="nvme"
DISKCFG=""
BDEV_CACHE=""
BDEV_POOL=""
DISKNO="ALL"
CPUS_ALLOWED=1
NOIOSCALING=false
PRECONDITIONING=true
CPUFREQ=""
PERFTOP=false
DPDKMEM=false
BPFTRACES=()
DATE="$(date +'%m_%d_%Y_%H%M%S')"
function usage() {
set +x
[[ -n $2 ]] && (
echo "$2"
echo ""
)
echo "Run NVMe PMD/BDEV performance test. Change options for easier debug and setup configuration"
echo "Usage: $(basename $1) [options]"
echo "-h, --help Print help and exit"
echo
echo "Workload parameters:"
echo " --rw=STR Type of I/O pattern. Accepted values are randrw,rw. [default=$RW]"
echo " --rwmixread=INT Percentage of a mixed workload that should be reads. [default=$MIX]"
echo " --iodepth=INT Number of I/Os to keep in flight against the file. [default=$IODEPTH]"
echo " --block-size=INT The block size in bytes used for I/O units. [default=$BLK_SIZE]"
echo " --run-time=TIME[s] Tell fio to run the workload for the specified period of time. [default=$RUNTIME]"
echo " --ramp-time=TIME[s] Fio will run the specified workload for this amount of time before"
echo " logging any performance numbers. [default=$RAMP_TIME]. Applicable only for fio-based tests."
echo " --numjobs=INT Create the specified number of clones of this job. [default=$NUMJOBS]"
echo " Applicable only for fio-based tests."
echo " --repeat-no=INT How many times to repeat workload test. [default=$REPEAT_NO]"
echo " Test result will be an average of repeated test runs."
echo " --gtod-reduce Enable fio gtod_reduce option. [default=$GTOD_REDUCE]"
echo " --sampling-int=INT Value for fio log_avg_msec parameters [default=$SAMPLING_INT]"
echo " --latency-log Write latency log file using write_lat_log fio option [default=$LATENCY_LOG]"
echo " --io-batch-submit=INT Value for iodepth_batch_submit fio option [default=$IO_BATCH_SUBMIT]"
echo " --io-batch-complete=INT Value for iodepth_batch_complete fio option [default=$IO_BATCH_COMPLETE]"
echo " --fio-bin=PATH Path to fio binary. [default=$FIO_BIN]"
echo " Applicable only for fio-based tests."
echo " --fio-fname-strategy=STR Use 'group' to group filenames under job section with common CPU or"
echo " use 'split' to create a separate fio job section for each filename [default=$FIO_FNAME_STRATEGY]"
echo
echo "Test setup parameters:"
echo " --driver=STR Selects tool used for testing. Choices available:"
echo " - spdk-perf-nvme (SPDK nvme perf)"
echo " - spdk-perf-bdev (SPDK bdev perf)"
echo " - spdk-perf-xnvme-bdev (SPDK xnvme bdev perf with io_uring io_mechanism)"
echo " - spdk-plugin-nvme (SPDK nvme fio plugin)"
echo " - spdk-plugin-bdev (SPDK bdev fio plugin)"
echo " - spdk-plugin-bdev-xnvme (SPDK bdev fio plugin with xnvme bdevs)"
echo " - kernel-classic-polling"
echo " - kernel-hybrid-polling"
echo " - kernel-libaio"
echo " - kernel-io-uring"
echo " --disk-config Configuration file containing PCI BDF addresses of NVMe disks to use in test."
echo " It consists a single column of PCI addresses. SPDK Bdev names will be assigned"
echo " and Kernel block device names detected."
echo " Lines starting with # are ignored as comments."
echo " --bdev-io-cache-size Set IO cache size for for SPDK bdev subsystem."
echo " --bdev-io-pool-size Set IO pool size for for SPDK bdev subsystem."
echo " --max-disk=INT,ALL Number of disks to test on, this will run multiple workloads with increasing number of disk each run."
echo " If =ALL then test on all found disk. [default=$DISKNO]"
echo " --cpu-allowed=INT/PATH Comma-separated list of CPU cores used to run the workload. Ranges allowed."
echo " Can also point to a file containing list of CPUs. [default=$CPUS_ALLOWED]"
echo " --no-preconditioning Skip preconditioning"
echo " --no-io-scaling Do not scale iodepth for each device in SPDK fio plugin. [default=$NOIOSCALING]"
echo " --cpu-frequency=INT Run tests with CPUs set to a desired frequency. 'intel_pstate=disable' must be set in"
echo " GRUB options. You can use 'cpupower frequency-info' and 'cpupower frequency-set' to"
echo " check list of available frequencies. Example: --cpu-frequency=1100000."
echo " --main-core main (primary) core for DPDK (for bdevperf only)."
echo
echo "Other options:"
echo " --perftop Run perftop measurements on the same CPU cores as specified in --cpu-allowed option."
echo " --dpdk-mem-stats Dump DPDK memory stats during the test."
echo " --bpf-traces=LIST Comma delimited list of .bt scripts for enabling BPF traces."
echo " List of .bt scripts available in spdk/scripts/bpf."
echo " Only for spdk-perf-bdev"
set -x
}
while getopts 'h-:' optchar; do
case "$optchar" in
-)
case "$OPTARG" in
help)
usage $0
exit 0
;;
rw=*) RW="${OPTARG#*=}" ;;
rwmixread=*) MIX="${OPTARG#*=}" ;;
iodepth=*) IODEPTH="${OPTARG#*=}" ;;
block-size=*) BLK_SIZE="${OPTARG#*=}" ;;
run-time=*) RUNTIME="${OPTARG#*=}" ;;
ramp-time=*) RAMP_TIME="${OPTARG#*=}" ;;
numjobs=*) NUMJOBS="${OPTARG#*=}" ;;
repeat-no=*) REPEAT_NO="${OPTARG#*=}" ;;
gtod-reduce) GTOD_REDUCE=true ;;
sampling-int=*) SAMPLING_INT="${OPTARG#*=}" ;;
io-batch-submit=*) IO_BATCH_SUBMIT="${OPTARG#*=}" ;;
io-batch-complete=*) IO_BATCH_COMPLETE="${OPTARG#*=}" ;;
fio-bin=*) FIO_BIN="${OPTARG#*=}" ;;
fio-fname-strategy=*)
FIO_FNAME_STRATEGY="${OPTARG#*=}"
if [[ "$FIO_FNAME_STRATEGY" == "split" ]]; then
NOIOSCALING=true
fi
;;
driver=*) PLUGIN="${OPTARG#*=}" ;;
disk-config=*)
DISKCFG="${OPTARG#*=}"
if [[ ! -f "$DISKCFG" ]]; then
echo "Disk configuration file $DISKCFG does not exist!"
exit 1
fi
;;
bdev-io-cache-size=*) BDEV_CACHE="${OPTARG#*=}" ;;
bdev-io-pool-size=*) BDEV_POOL="${OPTARG#*=}" ;;
max-disk=*) DISKNO="${OPTARG#*=}" ;;
cpu-allowed=*)
CPUS_ALLOWED="${OPTARG#*=}"
if [[ -f "$CPUS_ALLOWED" ]]; then
CPUS_ALLOWED=$(cat "$CPUS_ALLOWED")
fi
;;
no-preconditioning) PRECONDITIONING=false ;;
no-io-scaling) NOIOSCALING=true ;;
cpu-frequency=*) CPUFREQ="${OPTARG#*=}" ;;
perftop) PERFTOP=true ;;
dpdk-mem-stats) DPDKMEM=true ;;
bpf-traces=*) IFS="," read -r -a BPFTRACES <<< "${OPTARG#*=}" ;;
latency-log) LATENCY_LOG=true ;;
main-core=*) MAIN_CORE="${OPTARG#*=}" ;;
*)
usage $0 echo "Invalid argument '$OPTARG'"
exit 1
;;
esac
;;
h)
usage $0
exit 0
;;
*)
usage $0 "Invalid argument '$optchar'"
exit 1
;;
esac
done
result_dir=$testdir/results/perf_results_${BLK_SIZE}BS_${IODEPTH}QD_${RW}_${MIX}MIX_${PLUGIN}_${DATE}
result_file=$result_dir/perf_results_${BLK_SIZE}BS_${IODEPTH}QD_${RW}_${MIX}MIX_${PLUGIN}_${DATE}.csv
mkdir -p $result_dir
unset iops_disks bw mean_lat_disks_usec p90_lat_disks_usec p99_lat_disks_usec p99_99_lat_disks_usec stdev_disks_usec
echo "run-time,ramp-time,fio-plugin,QD,block-size,num-cpu-cores,workload,workload-mix" > $result_file
printf "%s,%s,%s,%s,%s,%s,%s,%s\n" $RUNTIME $RAMP_TIME $PLUGIN $IODEPTH $BLK_SIZE $NO_CORES $RW $MIX >> $result_file
echo "num_of_disks,iops,avg_lat[usec],p90[usec],p99[usec],p99.99[usec],stdev[usec],avg_slat[usec],avg_clat[usec],bw[Kib/s]" >> $result_file
trap 'rm -f *.state $testdir/bdev.conf; kill $perf_pid; wait $dpdk_mem_pid; print_backtrace' ERR SIGTERM SIGABRT
if [[ "$PLUGIN" =~ "xnvme" ]]; then
create_spdk_xnvme_bdev_conf "$BDEV_CACHE" "$BDEV_POOL"
elif [[ "$PLUGIN" =~ "bdev" ]]; then
create_spdk_bdev_conf "$BDEV_CACHE" "$BDEV_POOL"
fi
if [[ -s $testdir/bdev.conf ]]; then
echo "INFO: Generated bdev.conf file:"
cat $testdir/bdev.conf
fi
verify_disk_number
if [[ "$PLUGIN" =~ "xnvme" ]]; then
DISK_NAMES=$(get_disks)
DISKS_NUMA=$(get_numa_node "" "$DISK_NAMES")
else
DISK_NAMES=$(get_disks $PLUGIN)
DISKS_NUMA=$(get_numa_node $PLUGIN "$DISK_NAMES")
fi
CORES=$(get_cores "$CPUS_ALLOWED")
NO_CORES_ARRAY=($CORES)
NO_CORES=${#NO_CORES_ARRAY[@]}
if $PRECONDITIONING; then
preconditioning
fi
if [[ "$PLUGIN" =~ "kernel" || "$PLUGIN" =~ "xnvme" ]]; then
$rootdir/scripts/setup.sh reset
fio_ioengine_opt="${KERNEL_ENGINES[$PLUGIN]}"
if [[ $PLUGIN = "kernel-classic-polling" ]]; then
for disk in $DISK_NAMES; do
echo -1 > /sys/block/$disk/queue/io_poll_delay
done
elif [[ $PLUGIN = "kernel-hybrid-polling" ]]; then
for disk in $DISK_NAMES; do
echo 0 > /sys/block/$disk/queue/io_poll_delay
done
elif [[ $PLUGIN = "kernel-io-uring" || $PLUGIN =~ "xnvme" ]]; then
modprobe -rv nvme
modprobe nvme poll_queues=8
wait_for_nvme_reload $DISK_NAMES
backup_dir="/tmp/nvme_param_bak"
mkdir -p $backup_dir
for disk in $DISK_NAMES; do
echo "INFO: Backing up device parameters for $disk"
sysfs=/sys/block/$disk/queue
mkdir -p $backup_dir/$disk
cat $sysfs/iostats > $backup_dir/$disk/iostats
cat $sysfs/rq_affinity > $backup_dir/$disk/rq_affinity
cat $sysfs/nomerges > $backup_dir/$disk/nomerges
cat $sysfs/io_poll_delay > $backup_dir/$disk/io_poll_delay
done
for disk in $DISK_NAMES; do
echo "INFO: Setting device parameters for $disk"
sysfs=/sys/block/$disk/queue
echo 0 > $sysfs/iostats
echo 0 > $sysfs/rq_affinity
echo 2 > $sysfs/nomerges
echo -1 > $sysfs/io_poll_delay
done
fi
fi
cpu_governor="$(cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor)"
if [[ -n "$CPUFREQ" ]]; then
if [[ ! "$(cat /proc/cmdline)" =~ "intel_pstate=disable" ]]; then
echo "ERROR: Cannot set custom CPU frequency for test. intel_pstate=disable not in boot options."
false
else
cpupower frequency-set -g userspace
cpupower frequency-set -f $CPUFREQ
fi
else
cpupower frequency-set -g performance
fi
current_governor=$(cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor)
echo "INFO: Using $current_governor cpu governor for test."
if $PERFTOP; then
echo "INFO: starting perf record on cores $CPUS_ALLOWED"
perf record -C $CPUS_ALLOWED -o "$testdir/perf.data" &
perf_pid=$!
fi
if $DPDKMEM; then
echo "INFO: waiting to generate DPDK memory usage"
wait_time=$((RUNTIME / 2))
if [[ ! "$PLUGIN" =~ "perf" ]]; then
wait_time=$((wait_time + RAMP_TIME))
fi
(
sleep $wait_time
echo "INFO: generating DPDK memory usage"
$rootdir/scripts/rpc.py env_dpdk_get_mem_stats
) &
dpdk_mem_pid=$!
fi
iops_disks=0
bw=0
min_lat_disks_usec=0
max_lat_disks_usec=0
mean_lat_disks_usec=0
p90_lat_disks_usec=0
p99_lat_disks_usec=0
p99_99_lat_disks_usec=0
stdev_disks_usec=0
mean_slat_disks_usec=0
mean_clat_disks_usec=0
#Run each workload $REPEAT_NO times
for ((j = 0; j < REPEAT_NO; j++)); do
if [[ $PLUGIN == "spdk-perf-bdev" || $PLUGIN =~ "xnvme-bdev" ]]; then
run_bdevperf > $TMP_RESULT_FILE
read -r iops bandwidth <<< $(get_bdevperf_results)
iops_disks=$(bc "$iops_disks + $iops")
bw=$(bc "$bw + $bandwidth")
cp $TMP_RESULT_FILE $result_dir/perf_results_${MIX}_${PLUGIN}_${NO_CORES}cpus_${DATE}_${k}_disks_${j}.output
[[ -f $TMP_BPF_FILE ]] && mv $TMP_BPF_FILE $result_dir/bpftraces_${MIX}_${PLUGIN}_${NO_CORES}cpus_${DATE}_${k}_disks_${j}.txt
elif [ $PLUGIN = "spdk-perf-nvme" ]; then
run_nvmeperf $DISKNO > $TMP_RESULT_FILE
read -r iops bandwidth mean_lat min_lat max_lat <<< $(get_nvmeperf_results)
iops_disks=$(bc "$iops_disks+$iops")
bw=$(bc "$bw+$bandwidth")
mean_lat_disks_usec=$(bc "$mean_lat_disks_usec + $mean_lat")
min_lat_disks_usec=$(bc "$min_lat_disks_usec + $min_lat")
max_lat_disks_usec=$(bc "$max_lat_disks_usec + $max_lat")
cp $TMP_RESULT_FILE $result_dir/perf_results_${MIX}_${PLUGIN}_${NO_CORES}cpus_${DATE}_${k}_disks_${j}.output
else
create_fio_config $DISKNO $PLUGIN "$DISK_NAMES" "$DISKS_NUMA" "$CORES"
if $LATENCY_LOG; then
write_log_opt="--write_lat_log=$result_dir/perf_lat_${BLK_SIZE}BS_${IODEPTH}QD_${RW}_${MIX}MIX_${PLUGIN}_${DATE}_${k}disks_${j}"
fi
if [[ "$PLUGIN" =~ "spdk-plugin" ]]; then
run_spdk_nvme_fio $PLUGIN "--output=$TMP_RESULT_FILE" $write_log_opt
else
run_nvme_fio $fio_ioengine_opt "--output=$TMP_RESULT_FILE" $write_log_opt
fi
#Store values for every number of used disks
#Use recalculated value for mixread param in case rw mode is not rw.
rwmixread=$MIX
if [[ $RW = *"read"* ]]; then
rwmixread=100
elif [[ $RW = *"write"* ]]; then
rwmixread=0
fi
read -r iops bandwidth mean_lat_usec p90_lat_usec p99_lat_usec p99_99_lat_usec \
stdev_usec mean_slat_usec mean_clat_usec <<< $(get_results $rwmixread)
iops_disks=$(bc "$iops_disks + $iops")
mean_lat_disks_usec=$(bc "$mean_lat_disks_usec + $mean_lat_usec")
p90_lat_disks_usec=$(bc "$p90_lat_disks_usec + $p90_lat_usec")
p99_lat_disks_usec=$(bc "$p99_lat_disks_usec + $p99_lat_usec")
p99_99_lat_disks_usec=$(bc "$p99_99_lat_disks_usec + $p99_99_lat_usec")
stdev_disks_usec=$(bc "$stdev_disks_usec + $stdev_usec")
mean_slat_disks_usec=$(bc "$mean_slat_disks_usec + $mean_slat_usec")
mean_clat_disks_usec=$(bc "$mean_clat_disks_usec + $mean_clat_usec")
bw=$(bc "$bw + $bandwidth")
cp $TMP_RESULT_FILE $result_dir/perf_results_${MIX}_${PLUGIN}_${NO_CORES}cpus_${DATE}_${k}_disks_${j}.json
cp $testdir/config.fio $result_dir/config_${MIX}_${PLUGIN}_${NO_CORES}cpus_${DATE}_${k}_disks_${j}.fio
rm -f $testdir/config.fio
fi
done
if $PERFTOP; then
echo "INFO: Stopping perftop measurements."
kill $perf_pid
wait $perf_pid || true
perf report -i "$testdir/perf.data" > $result_dir/perftop_${BLK_SIZE}BS_${IODEPTH}QD_${RW}_${MIX}MIX_${PLUGIN}_${DATE}.txt
rm -f "$testdir/perf.data"
fi
if $DPDKMEM; then
mv "/tmp/spdk_mem_dump.txt" $result_dir/spdk_mem_dump_${BLK_SIZE}BS_${IODEPTH}QD_${RW}_${MIX}MIX_${PLUGIN}_${DATE}.txt
echo "INFO: DPDK memory usage saved in $result_dir"
fi
#Write results to csv file
iops_disks=$(bc "$iops_disks / $REPEAT_NO")
bw=$(bc "$bw / $REPEAT_NO")
if [[ "$PLUGIN" =~ "plugin" || "$PLUGIN" =~ "kernel" ]] && [[ ! $PLUGIN =~ "xnvme-bdev" ]]; then
mean_lat_disks_usec=$(bc "$mean_lat_disks_usec / $REPEAT_NO")
p90_lat_disks_usec=$(bc "$p90_lat_disks_usec / $REPEAT_NO")
p99_lat_disks_usec=$(bc "$p99_lat_disks_usec / $REPEAT_NO")
p99_99_lat_disks_usec=$(bc "$p99_99_lat_disks_usec / $REPEAT_NO")
stdev_disks_usec=$(bc "$stdev_disks_usec / $REPEAT_NO")
mean_slat_disks_usec=$(bc "$mean_slat_disks_usec / $REPEAT_NO")
mean_clat_disks_usec=$(bc "$mean_clat_disks_usec / $REPEAT_NO")
elif [[ "$PLUGIN" == "spdk-perf-nvme" ]]; then
mean_lat_disks_usec=$(bc "$mean_lat_disks_usec/$REPEAT_NO")
fi
printf "%s,%s,%s,%s,%s,%s,%s,%s,%s,%s\n" ${DISKNO} ${iops_disks} ${mean_lat_disks_usec} ${p90_lat_disks_usec} ${p99_lat_disks_usec} \
${p99_99_lat_disks_usec} ${stdev_disks_usec} ${mean_slat_disks_usec} ${mean_clat_disks_usec} ${bw} >> $result_file
if [[ -n "$CPUFREQ" ]]; then
cpupower frequency-set -g $cpu_governor
fi
if [[ $PLUGIN = "kernel-io-uring" || $PLUGIN =~ "xnvme" ]]; then
# Reload the nvme driver so that other test runs are not affected
modprobe -rv nvme
modprobe nvme
wait_for_nvme_reload $DISK_NAMES
for disk in $DISK_NAMES; do
echo "INFO: Restoring device parameters for $disk"
sysfs=/sys/block/$disk/queue
cat $backup_dir/$disk/iostats > $sysfs/iostats
cat $backup_dir/$disk/rq_affinity > $sysfs/rq_affinity
cat $backup_dir/$disk/nomerges > $sysfs/nomerges
cat $backup_dir/$disk/io_poll_delay > $sysfs/io_poll_delay
done
fi
rm -f $testdir/bdev.conf $testdir/config.fio