Spdk/test/nvme/perf/common.sh

450 lines
12 KiB
Bash
Raw Normal View History

#!/usr/bin/env bash
function discover_bdevs() {
local rootdir=$1
local config_file=$2
local cfg_type=$3
local wait_for_spdk_bdev=${4:-30}
local rpc_server=/var/tmp/spdk-discover-bdevs.sock
if [ ! -e $config_file ]; then
echo "Invalid Configuration File: $config_file"
return 1
fi
if [ -z $cfg_type ]; then
cfg_type="-c"
fi
# Start the bdev service to query for the list of available
# bdevs.
$rootdir/test/app/bdev_svc/bdev_svc -r $rpc_server -i 0 \
$cfg_type $config_file &> /dev/null &
stubpid=$!
while ! [ -e /var/run/spdk_bdev0 ]; do
# If this counter drops to zero, errexit will be caught to abort the test
((wait_for_spdk_bdev--))
sleep 1
done
# Get all of the bdevs
$rootdir/scripts/rpc.py -s "$rpc_server" bdev_get_bdevs
# Shut down the bdev service
kill $stubpid
wait $stubpid
rm -f /var/run/spdk_bdev0
}
function create_spdk_bdev_conf() {
local output
local disk_cfg
local bdev_io_cache_size=$1
local bdev_io_pool_size=$2
local bdev_json_cfg=()
local bdev_opts=()
disk_cfg=($(grep -vP "^\s*#" "$DISKCFG"))
if [[ -n "$bdev_io_cache_size" ]]; then
bdev_opts+=("\"bdev_io_cache_size\": $bdev_io_cache_size")
fi
if [[ -n "$bdev_io_pool_size" ]]; then
bdev_opts+=("\"bdev_io_pool_size\": $bdev_io_pool_size")
fi
local IFS=","
if [[ ${#bdev_opts[@]} -gt 0 ]]; then
bdev_json_cfg+=("$(
cat <<- JSON
{
"method": "bdev_set_options",
"params": {
${bdev_opts[*]}
}
}
JSON
)")
fi
for i in "${!disk_cfg[@]}"; do
bdev_json_cfg+=("$(
cat <<- JSON
{
"method": "bdev_nvme_attach_controller",
"params": {
"trtype": "PCIe",
"name":"Nvme${i}",
"traddr":"${disk_cfg[i]}"
}
}
JSON
)")
done
local IFS=","
jq -r '.' <<- JSON > $testdir/bdev.conf
{
"subsystems": [
{
"subsystem": "bdev",
"config": [
${bdev_json_cfg[*]}
]
}
]
}
JSON
}
function is_bdf_not_mounted() {
local bdf=$1
local blkname
local mountpoints
blkname=$(ls -l /sys/block/ | grep $bdf | awk '{print $9}')
mountpoints=$(lsblk /dev/$blkname --output MOUNTPOINT -n | wc -w)
return $mountpoints
}
function get_cores() {
local cpu_list="$1"
for cpu in ${cpu_list//,/ }; do
echo $cpu
done
}
function get_cores_numa_node() {
local cores=$1
for core in $cores; do
lscpu -p=cpu,node | grep "^$core\b" | awk -F ',' '{print $2}'
done
}
function get_numa_node() {
local plugin=$1
local disks=$2
if [[ "$plugin" =~ "nvme" ]]; then
for bdf in $disks; do
local driver
driver=$(grep DRIVER /sys/bus/pci/devices/$bdf/uevent | awk -F"=" '{print $2}')
# Use this check to ommit blacklisted devices ( not binded to driver with setup.sh script )
if [ "$driver" = "vfio-pci" ] || [ "$driver" = "uio_pci_generic" ]; then
cat /sys/bus/pci/devices/$bdf/numa_node
fi
done
elif [[ "$plugin" =~ "bdev" ]]; then
local bdevs
bdevs=$(discover_bdevs $rootdir $testdir/bdev.conf --json)
for name in $disks; do
local bdev_bdf
bdev_bdf=$(jq -r ".[] | select(.name==\"$name\").driver_specific.nvme.pci_address" <<< $bdevs)
cat /sys/bus/pci/devices/$bdev_bdf/numa_node
done
else
for name in $disks; do
local bdf
# Not reading directly from /sys/block/nvme* because of a kernel bug
# which results in NUMA 0 always getting reported.
bdf=$(cat /sys/block/$name/device/address)
cat /sys/bus/pci/devices/$bdf/numa_node
done
fi
}
function get_disks() {
local plugin=$1
local disk_cfg
disk_cfg=($(grep -vP "^\s*#" "$DISKCFG"))
if [[ "$plugin" =~ "nvme" ]]; then
# PCI BDF address is enough for nvme-perf and nvme-fio-plugin,
# so just print them from configuration file
echo "${disk_cfg[*]}"
elif [[ "$plugin" =~ "bdev" ]]; then
# Generate NvmeXn1 bdev name configuration file for bdev-perf
# and bdev-fio-plugin
local bdevs
local disk_no
disk_no=${#disk_cfg[@]}
eval echo "Nvme{0..$((disk_no - 1))}n1"
else
# Find nvme block devices and only use the ones which
# are not mounted
for bdf in "${disk_cfg[@]}"; do
if is_bdf_not_mounted $bdf; then
local blkname
blkname=$(ls -l /sys/block/ | grep $bdf | awk '{print $9}')
echo $blkname
fi
done
fi
}
function get_disks_on_numa() {
local devs=($1)
local numas=($2)
local numa_no=$3
local disks_on_numa=""
local i
for ((i = 0; i < ${#devs[@]}; i++)); do
if [ ${numas[$i]} = $numa_no ]; then
disks_on_numa=$((disks_on_numa + 1))
fi
done
echo $disks_on_numa
}
function create_fio_config() {
local disk_no=$1
local plugin=$2
local disks=($3)
local disks_numa=($4)
local cores=($5)
local total_disks=${#disks[@]}
local no_cores=${#cores[@]}
local filename=""
local cores_numa
cores_numa=($(get_cores_numa_node "$5"))
local disks_per_core=$((disk_no / no_cores))
local disks_per_core_mod=$((disk_no % no_cores))
# For kernel dirver, each disk will be alligned with all cpus on the same NUMA node
if [[ "$plugin" =~ "kernel" ]]; then
for ((i = 0; i < disk_no; i++)); do
sed -i -e "\$a[filename${i}]" $testdir/config.fio
filename="/dev/${disks[$i]}"
sed -i -e "\$afilename=$filename" $testdir/config.fio
cpu_used=""
for ((j = 0; j < no_cores; j++)); do
core_numa=${cores_numa[$j]}
if [ "${disks_numa[$i]}" = "$core_numa" ]; then
cpu_used+="${cores[$j]},"
fi
done
sed -i -e "\$acpus_allowed=$cpu_used" $testdir/config.fio
echo "" >> $testdir/config.fio
done
else
for ((i = 0; i < no_cores; i++)); do
core_numa=${cores_numa[$i]}
total_disks_per_core=$disks_per_core
if [ "$disks_per_core_mod" -gt "0" ]; then
total_disks_per_core=$((disks_per_core + 1))
disks_per_core_mod=$((disks_per_core_mod - 1))
fi
if [ "$total_disks_per_core" = "0" ]; then
break
fi
sed -i -e "\$a[filename${i}]" $testdir/config.fio
#use cpus_allowed as cpumask works only for cores 1-32
sed -i -e "\$acpus_allowed=${cores[$i]}" $testdir/config.fio
m=0 #counter of disks per cpu core numa
n=0 #counter of all disks
while [ "$m" -lt "$total_disks_per_core" ]; do
if [ ${disks_numa[$n]} = $core_numa ]; then
m=$((m + 1))
if [[ "$plugin" = "spdk-plugin-nvme" ]]; then
filename='trtype=PCIe traddr='${disks[$n]//:/.}' ns=1'
elif [[ "$plugin" = "spdk-plugin-bdev" ]]; then
filename=${disks[$n]}
fi
sed -i -e "\$afilename=$filename" $testdir/config.fio
#Mark numa of n'th disk as "x" to mark it as claimed
disks_numa[$n]="x"
fi
n=$((n + 1))
# If there is no more disks with numa node same as cpu numa node, switch to other numa node.
if [ $n -ge $total_disks ]; then
if [ "$core_numa" = "1" ]; then
core_numa=0
else
core_numa=1
fi
n=0
fi
done
echo "" >> $testdir/config.fio
done
fi
}
function preconditioning() {
local dev_name=""
local filename=""
local nvme_list
HUGEMEM=8192 $rootdir/scripts/setup.sh
cp $testdir/config.fio.tmp $testdir/config.fio
echo "[Preconditioning]" >> $testdir/config.fio
# Generate filename argument for FIO.
# We only want to target NVMes not bound to nvme driver.
# If they're still bound to nvme that means they were skipped by
# setup.sh on purpose.
nvme_list=$(get_disks nvme)
for nvme in $nvme_list; do
dev_name='trtype=PCIe traddr='${nvme//:/.}' ns=1'
filename+=$(printf %s":" "$dev_name")
done
echo "** Preconditioning disks, this can take a while, depending on the size of disks."
run_spdk_nvme_fio "spdk-plugin-nvme" --filename="$filename" --size=100% --loops=2 --bs=1M \
--rw=write --iodepth=32 --output-format=normal
rm -f $testdir/config.fio
}
function get_results() {
local reads_pct
local writes_pct
reads_pct=$(bc -l <<< "scale=3; $2/100")
writes_pct=$(bc -l <<< "scale=3; 1-$reads_pct")
case "$1" in
iops)
iops=$(jq -r '.jobs[] | .read.iops + .write.iops' $TMP_RESULT_FILE)
iops=${iops%.*}
echo $iops
;;
mean_lat_usec)
mean_lat=$(jq -r ".jobs[] | (.read.lat_ns.mean * $reads_pct + .write.lat_ns.mean * $writes_pct)" $TMP_RESULT_FILE)
mean_lat=${mean_lat%.*}
echo $((mean_lat / 1000))
;;
p99_lat_usec)
p99_lat=$(jq -r ".jobs[] | (.read.clat_ns.percentile.\"99.000000\" // 0 * $reads_pct + .write.clat_ns.percentile.\"99.000000\" // 0 * $writes_pct)" $TMP_RESULT_FILE)
p99_lat=${p99_lat%.*}
echo $((p99_lat / 1000))
;;
p99_99_lat_usec)
p99_99_lat=$(jq -r ".jobs[] | (.read.clat_ns.percentile.\"99.990000\" // 0 * $reads_pct + .write.clat_ns.percentile.\"99.990000\" // 0 * $writes_pct)" $TMP_RESULT_FILE)
p99_99_lat=${p99_99_lat%.*}
echo $((p99_99_lat / 1000))
;;
stdev_usec)
stdev=$(jq -r ".jobs[] | (.read.clat_ns.stddev * $reads_pct + .write.clat_ns.stddev * $writes_pct)" $TMP_RESULT_FILE)
stdev=${stdev%.*}
echo $((stdev / 1000))
;;
mean_slat_usec)
mean_slat=$(jq -r ".jobs[] | (.read.slat_ns.mean * $reads_pct + .write.slat_ns.mean * $writes_pct)" $TMP_RESULT_FILE)
mean_slat=${mean_slat%.*}
echo $((mean_slat / 1000))
;;
mean_clat_usec)
mean_clat=$(jq -r ".jobs[] | (.read.clat_ns.mean * $reads_pct + .write.clat_ns.mean * $writes_pct)" $TMP_RESULT_FILE)
mean_clat=${mean_clat%.*}
echo $((mean_clat / 1000))
;;
bw_Kibs)
bw=$(jq -r ".jobs[] | (.read.bw + .write.bw)" $TMP_RESULT_FILE)
bw=${bw%.*}
echo $((bw))
;;
esac
}
function get_bdevperf_results() {
case "$1" in
iops)
iops=$(grep Total $TMP_RESULT_FILE | awk -F 'Total' '{print $2}' | awk '{print $2}')
iops=${iops%.*}
echo $iops
;;
bw_Kibs)
bw_MBs=$(grep Total $TMP_RESULT_FILE | awk -F 'Total' '{print $2}' | awk '{print $4}')
bw_MBs=${bw_MBs%.*}
echo $((bw_MBs * 1024))
;;
esac
}
function get_nvmeperf_results() {
local iops
local bw_MBs
local mean_lat_usec
local max_lat_usec
local min_lat_usec
read -r iops bw_MBs mean_lat_usec min_lat_usec max_lat_usec <<< $(tr -s " " < $TMP_RESULT_FILE | grep -oP "(?<=Total : )(.*+)")
# We need to get rid of the decimal spaces due
# to use of arithmetic expressions instead of "bc" for calculations
iops=${iops%.*}
bw_MBs=${bw_MBs%.*}
mean_lat_usec=${mean_lat_usec%.*}
min_lat_usec=${min_lat_usec%.*}
max_lat_usec=${max_lat_usec%.*}
echo "$iops $(bc <<< "$bw_MBs * 1024") $mean_lat_usec $min_lat_usec $max_lat_usec"
}
function run_spdk_nvme_fio() {
local plugin=$1
echo "** Running fio test, this can take a while, depending on the run-time and ramp-time setting."
if [[ "$plugin" = "spdk-plugin-nvme" ]]; then
LD_PRELOAD=$plugin_dir/spdk_nvme $FIO_BIN $testdir/config.fio --output-format=json "${@:2}" --ioengine=spdk
elif [[ "$plugin" = "spdk-plugin-bdev" ]]; then
LD_PRELOAD=$plugin_dir/spdk_bdev $FIO_BIN $testdir/config.fio --output-format=json "${@:2}" --ioengine=spdk_bdev --spdk_json_conf=$testdir/bdev.conf --spdk_mem=4096
fi
sleep 1
}
function run_nvme_fio() {
echo "** Running fio test, this can take a while, depending on the run-time and ramp-time setting."
$FIO_BIN $testdir/config.fio --output-format=json "$@"
sleep 1
}
function run_bdevperf() {
echo "** Running bdevperf test, this can take a while, depending on the run-time setting."
$bdevperf_dir/bdevperf --json $testdir/bdev.conf -q $IODEPTH -o $BLK_SIZE -w $RW -M $MIX -t $RUNTIME -m "[$CPUS_ALLOWED]"
sleep 1
}
function run_nvmeperf() {
# Prepare -r argument string for nvme perf command
local r_opt
local disks
# Limit the number of disks to $1 if needed
disks=($(get_disks nvme))
disks=("${disks[@]:0:$1}")
r_opt=$(printf -- ' -r "trtype:PCIe traddr:%s"' "${disks[@]}")
echo "** Running nvme perf test, this can take a while, depending on the run-time setting."
# Run command in separate shell as this solves quoting issues related to r_opt var
$SHELL -c "$nvmeperf_dir/perf $r_opt -q $IODEPTH -o $BLK_SIZE -w $RW -M $MIX -t $RUNTIME -c [$CPUS_ALLOWED]"
sleep 1
}
function wait_for_nvme_reload() {
local nvmes=$1
shopt -s extglob
for disk in $nvmes; do
cmd="ls /sys/block/$disk/queue/*@(iostats|rq_affinity|nomerges|io_poll_delay)*"
until $cmd 2> /dev/null; do
echo "Waiting for full nvme driver reload..."
sleep 0.5
done
done
shopt -q extglob
}
function verify_disk_number() {
# Check if we have appropriate number of disks to carry out the test
disks=($(get_disks $PLUGIN))
if [[ $DISKNO == "ALL" ]] || [[ $DISKNO == "all" ]]; then
DISKNO=${#disks[@]}
elif [[ $DISKNO -gt ${#disks[@]} ]] || [[ ! $DISKNO =~ ^[0-9]+$ ]]; then
echo "error: Required devices number ($DISKNO) is not a valid number or it's larger than the number of devices found (${#disks[@]})"
false
fi
}