Spdk/test/nvme/perf/common.sh

477 lines
14 KiB
Bash
Raw Normal View History

#!/usr/bin/env bash
function discover_bdevs() {
local rootdir=$1
local config_file=$2
local cfg_type=$3
local wait_for_spdk_bdev=${4:-30}
local rpc_server=/var/tmp/spdk-discover-bdevs.sock
if [ ! -e $config_file ]; then
echo "Invalid Configuration File: $config_file"
return 1
fi
if [ -z $cfg_type ]; then
cfg_type="-c"
fi
# Start the bdev service to query for the list of available
# bdevs.
$rootdir/test/app/bdev_svc/bdev_svc -r $rpc_server -i 0 \
$cfg_type $config_file &> /dev/null &
stubpid=$!
while ! [ -e /var/run/spdk_bdev0 ]; do
# If this counter drops to zero, errexit will be caught to abort the test
((wait_for_spdk_bdev--))
sleep 1
done
# Get all of the bdevs
$rootdir/scripts/rpc.py -s "$rpc_server" bdev_get_bdevs
# Shut down the bdev service
kill $stubpid
wait $stubpid
rm -f /var/run/spdk_bdev0
}
function create_spdk_bdev_conf() {
local output
local disk_cfg
local bdev_io_cache_size=$1
local bdev_io_pool_size=$2
local bdev_json_cfg=()
local bdev_opts=()
disk_cfg=($(grep -vP "^\s*#" "$DISKCFG"))
if [[ -n "$bdev_io_cache_size" ]]; then
bdev_opts+=("\"bdev_io_cache_size\": $bdev_io_cache_size")
fi
if [[ -n "$bdev_io_pool_size" ]]; then
bdev_opts+=("\"bdev_io_pool_size\": $bdev_io_pool_size")
fi
local IFS=","
if [[ ${#bdev_opts[@]} -gt 0 ]]; then
bdev_json_cfg+=("$(
cat <<- JSON
{
"method": "bdev_set_options",
"params": {
${bdev_opts[*]}
}
}
JSON
)")
fi
for i in "${!disk_cfg[@]}"; do
bdev_json_cfg+=("$(
cat <<- JSON
{
"method": "bdev_nvme_attach_controller",
"params": {
"trtype": "PCIe",
"name":"Nvme${i}",
"traddr":"${disk_cfg[i]}"
}
}
JSON
)")
done
local IFS=","
jq -r '.' <<- JSON > $testdir/bdev.conf
{
"subsystems": [
{
"subsystem": "bdev",
"config": [
${bdev_json_cfg[*]}
]
}
]
}
JSON
}
function is_bdf_not_mounted() {
local bdf=$1
local blkname
local mountpoints
blkname=$(ls -l /sys/block/ | grep $bdf | awk '{print $9}')
mountpoints=$(lsblk /dev/$blkname --output MOUNTPOINT -n | wc -w)
return $mountpoints
}
function get_cores() {
local cpu_list="$1"
for cpu in ${cpu_list//,/ }; do
echo $cpu
done
}
function get_cores_numa_node() {
local cores=$1
for core in $cores; do
lscpu -p=cpu,node | grep "^$core\b" | awk -F ',' '{print $2}'
done
}
function get_numa_node() {
local plugin=$1
local disks=$2
if [[ "$plugin" =~ "nvme" ]]; then
for bdf in $disks; do
local driver
driver=$(grep DRIVER /sys/bus/pci/devices/$bdf/uevent | awk -F"=" '{print $2}')
# Use this check to ommit blacklisted devices ( not binded to driver with setup.sh script )
if [ "$driver" = "vfio-pci" ] || [ "$driver" = "uio_pci_generic" ]; then
cat /sys/bus/pci/devices/$bdf/numa_node
fi
done
elif [[ "$plugin" =~ "bdev" ]]; then
local bdevs
bdevs=$(discover_bdevs $rootdir $testdir/bdev.conf --json)
for name in $disks; do
local bdev_bdf
bdev_bdf=$(jq -r ".[] | select(.name==\"$name\").driver_specific.nvme.pci_address" <<< $bdevs)
cat /sys/bus/pci/devices/$bdev_bdf/numa_node
done
else
for name in $disks; do
local bdf
# Not reading directly from /sys/block/nvme* because of a kernel bug
# which results in NUMA 0 always getting reported.
bdf=$(cat /sys/block/$name/device/address)
cat /sys/bus/pci/devices/$bdf/numa_node
done
fi
}
function get_disks() {
local plugin=$1
local disk_cfg
disk_cfg=($(grep -vP "^\s*#" "$DISKCFG"))
if [[ "$plugin" =~ "nvme" ]]; then
# PCI BDF address is enough for nvme-perf and nvme-fio-plugin,
# so just print them from configuration file
echo "${disk_cfg[*]}"
elif [[ "$plugin" =~ "bdev" ]]; then
# Generate NvmeXn1 bdev name configuration file for bdev-perf
# and bdev-fio-plugin
local bdevs
local disk_no
disk_no=${#disk_cfg[@]}
eval echo "Nvme{0..$((disk_no - 1))}n1"
else
# Find nvme block devices and only use the ones which
# are not mounted
for bdf in "${disk_cfg[@]}"; do
if is_bdf_not_mounted $bdf; then
local blkname
blkname=$(ls -l /sys/block/ | grep $bdf | awk '{print $9}')
echo $blkname
fi
done
fi
}
function get_disks_on_numa() {
local devs=($1)
local numas=($2)
local numa_no=$3
local disks_on_numa=""
local i
for ((i = 0; i < ${#devs[@]}; i++)); do
if [ ${numas[$i]} = $numa_no ]; then
disks_on_numa=$((disks_on_numa + 1))
fi
done
echo $disks_on_numa
}
function create_fio_config() {
local disk_no=$1
local plugin=$2
local disks=($3)
local disks_numa=($4)
local cores=($5)
local total_disks=${#disks[@]}
test/nvme_perf: unify fio job section for kernel and spdk For some reason the routine for generating fio job/filename sections was different for kernel and spdk fio modes. This made fio results hard to compare because even configuration files themselves were not comparable. The function was modified to create the same fio configuration files for kernel and spdk. While at it, iodepth calculation has been slightly modified as well. It is now being calculated for each [ ] job section, so that each "filename" gets the actual iodepth value specified in test run parameters. While generating the config file the script will add NUMA node information as a comment for "cpus_allowed" and "filename" sections, so that it's clearly visible how resources are aligned. In case of cross numa configuration additional warnings will be printed to screen as well. For example, a configuration file for running test with 7 NVMe drives, 3 CPUs and effective (per filename) queue depth of 32 would look like this: run_perf.sh --cpu-allowed=0,1,2 --iodepth=32 --driver=kernel-classic-polling [...other params...] -------- [filename0] iodepth=96 cpus_allowed=0 #CPU NUMA Node X filename=/dev/nvme0n1 #NVMe NUMA Node X filename=/dev/nvme1n1 #NVMe NUMA Node X filename=/dev/nvme2n1 #NVMe NUMA Node X [filename1] iodepth=64 cpus_allowed=1 #CPU NUMA Node X filename=/dev/nvme3n1 #NVMe NUMA Node X filename=/dev/nvme4n1 #NVMe NUMA Node X [filename2] iodepth=64 cpus_allowed=2 #CPU NUMA Node X filename=/dev/nvme5n1 #NVMe NUMA Node X filename=/dev/nvme6n1 #NVMe NUMA Node X -------- Change-Id: Ida2f781fbb93c4a8c62154e711151152843ab997 Signed-off-by: Karol Latecki <karol.latecki@intel.com> Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/3370 Community-CI: Mellanox Build Bot Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Tomasz Zawadzki <tomasz.zawadzki@intel.com> Reviewed-by: Michal Berger <michalx.berger@intel.com> Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
2020-07-16 09:08:03 +00:00
local fio_job_section=()
local num_cores=${#cores[@]}
local disks_per_core=$((disk_no / num_cores))
local disks_per_core_mod=$((disk_no % num_cores))
local cores_numa
test/nvme_perf: unify fio job section for kernel and spdk For some reason the routine for generating fio job/filename sections was different for kernel and spdk fio modes. This made fio results hard to compare because even configuration files themselves were not comparable. The function was modified to create the same fio configuration files for kernel and spdk. While at it, iodepth calculation has been slightly modified as well. It is now being calculated for each [ ] job section, so that each "filename" gets the actual iodepth value specified in test run parameters. While generating the config file the script will add NUMA node information as a comment for "cpus_allowed" and "filename" sections, so that it's clearly visible how resources are aligned. In case of cross numa configuration additional warnings will be printed to screen as well. For example, a configuration file for running test with 7 NVMe drives, 3 CPUs and effective (per filename) queue depth of 32 would look like this: run_perf.sh --cpu-allowed=0,1,2 --iodepth=32 --driver=kernel-classic-polling [...other params...] -------- [filename0] iodepth=96 cpus_allowed=0 #CPU NUMA Node X filename=/dev/nvme0n1 #NVMe NUMA Node X filename=/dev/nvme1n1 #NVMe NUMA Node X filename=/dev/nvme2n1 #NVMe NUMA Node X [filename1] iodepth=64 cpus_allowed=1 #CPU NUMA Node X filename=/dev/nvme3n1 #NVMe NUMA Node X filename=/dev/nvme4n1 #NVMe NUMA Node X [filename2] iodepth=64 cpus_allowed=2 #CPU NUMA Node X filename=/dev/nvme5n1 #NVMe NUMA Node X filename=/dev/nvme6n1 #NVMe NUMA Node X -------- Change-Id: Ida2f781fbb93c4a8c62154e711151152843ab997 Signed-off-by: Karol Latecki <karol.latecki@intel.com> Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/3370 Community-CI: Mellanox Build Bot Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Tomasz Zawadzki <tomasz.zawadzki@intel.com> Reviewed-by: Michal Berger <michalx.berger@intel.com> Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
2020-07-16 09:08:03 +00:00
cores_numa=($(get_cores_numa_node "${cores[*]}"))
# Following part of this function still leverages global variables a lot.
# It's a mix of local variables passed as aruments to function with global variables. This is messy.
# TODO: Modify this to be consistent with how variables are used here. Aim for using only
# local variables to get rid of globals as much as possible.
desc="\"Test io_plugin=$PLUGIN Blocksize=${BLK_SIZE} Workload=$RW MIX=${MIX} qd=${IODEPTH}\""
cp "$testdir/config.fio.tmp" "$testdir/config.fio"
cat <<- EOF >> $testdir/config.fio
description=$desc
rw=$RW
rwmixread=$MIX
bs=$BLK_SIZE
runtime=$RUNTIME
ramp_time=$RAMP_TIME
numjobs=$NUMJOBS
log_avg_msec=$SAMPLING_INT
EOF
if $GTOD_REDUCE; then
echo "gtod_reduce=1" >> $testdir/config.fio
fi
test/nvme_perf: unify fio job section for kernel and spdk For some reason the routine for generating fio job/filename sections was different for kernel and spdk fio modes. This made fio results hard to compare because even configuration files themselves were not comparable. The function was modified to create the same fio configuration files for kernel and spdk. While at it, iodepth calculation has been slightly modified as well. It is now being calculated for each [ ] job section, so that each "filename" gets the actual iodepth value specified in test run parameters. While generating the config file the script will add NUMA node information as a comment for "cpus_allowed" and "filename" sections, so that it's clearly visible how resources are aligned. In case of cross numa configuration additional warnings will be printed to screen as well. For example, a configuration file for running test with 7 NVMe drives, 3 CPUs and effective (per filename) queue depth of 32 would look like this: run_perf.sh --cpu-allowed=0,1,2 --iodepth=32 --driver=kernel-classic-polling [...other params...] -------- [filename0] iodepth=96 cpus_allowed=0 #CPU NUMA Node X filename=/dev/nvme0n1 #NVMe NUMA Node X filename=/dev/nvme1n1 #NVMe NUMA Node X filename=/dev/nvme2n1 #NVMe NUMA Node X [filename1] iodepth=64 cpus_allowed=1 #CPU NUMA Node X filename=/dev/nvme3n1 #NVMe NUMA Node X filename=/dev/nvme4n1 #NVMe NUMA Node X [filename2] iodepth=64 cpus_allowed=2 #CPU NUMA Node X filename=/dev/nvme5n1 #NVMe NUMA Node X filename=/dev/nvme6n1 #NVMe NUMA Node X -------- Change-Id: Ida2f781fbb93c4a8c62154e711151152843ab997 Signed-off-by: Karol Latecki <karol.latecki@intel.com> Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/3370 Community-CI: Mellanox Build Bot Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Tomasz Zawadzki <tomasz.zawadzki@intel.com> Reviewed-by: Michal Berger <michalx.berger@intel.com> Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
2020-07-16 09:08:03 +00:00
for i in "${!cores[@]}"; do
local m=0 #Counter of disks per NUMA node
local n=0 #Counter of all disks in test
core_numa=${cores_numa[$i]}
total_disks_per_core=$disks_per_core
# Check how many "stray" disks are unassigned to CPU cores
# Assign one disk to current CPU core and substract it from the total of
# unassigned disks
if [[ "$disks_per_core_mod" -gt "0" ]]; then
total_disks_per_core=$((disks_per_core + 1))
disks_per_core_mod=$((disks_per_core_mod - 1))
fi
# SPDK fio plugin supports submitting/completing I/Os to multiple SSDs from a single thread.
# Therefore, the per thread queue depth is set to the desired IODEPTH/device X the number of devices per thread.
QD=$IODEPTH
if [[ "$NOIOSCALING" = false ]]; then
QD=$((IODEPTH * total_disks_per_core))
fi
fio_job_section+=("")
fio_job_section+=("[filename${i}]")
fio_job_section+=("iodepth=$QD")
fio_job_section+=("cpus_allowed=${cores[$i]} #CPU NUMA Node ${cores_numa[$i]}")
while [[ "$m" -lt "$total_disks_per_core" ]]; do
# Try to add disks to job section if it's NUMA node matches NUMA
# for currently selected CPU
if [[ "${disks_numa[$n]}" == "$core_numa" ]]; then
if [[ "$plugin" == "spdk-plugin-nvme" ]]; then
fio_job_section+=("filename=trtype=PCIe traddr=${disks[$n]//:/.} ns=1 #NVMe NUMA Node ${disks_numa[$n]}")
elif [[ "$plugin" == "spdk-plugin-bdev" ]]; then
fio_job_section+=("filename=${disks[$n]} #NVMe NUMA Node ${disks_numa[$n]}")
elif [[ "$plugin" =~ "kernel" ]]; then
fio_job_section+=("filename=/dev/${disks[$n]} #NVMe NUMA Node ${disks_numa[$n]}")
fi
test/nvme_perf: unify fio job section for kernel and spdk For some reason the routine for generating fio job/filename sections was different for kernel and spdk fio modes. This made fio results hard to compare because even configuration files themselves were not comparable. The function was modified to create the same fio configuration files for kernel and spdk. While at it, iodepth calculation has been slightly modified as well. It is now being calculated for each [ ] job section, so that each "filename" gets the actual iodepth value specified in test run parameters. While generating the config file the script will add NUMA node information as a comment for "cpus_allowed" and "filename" sections, so that it's clearly visible how resources are aligned. In case of cross numa configuration additional warnings will be printed to screen as well. For example, a configuration file for running test with 7 NVMe drives, 3 CPUs and effective (per filename) queue depth of 32 would look like this: run_perf.sh --cpu-allowed=0,1,2 --iodepth=32 --driver=kernel-classic-polling [...other params...] -------- [filename0] iodepth=96 cpus_allowed=0 #CPU NUMA Node X filename=/dev/nvme0n1 #NVMe NUMA Node X filename=/dev/nvme1n1 #NVMe NUMA Node X filename=/dev/nvme2n1 #NVMe NUMA Node X [filename1] iodepth=64 cpus_allowed=1 #CPU NUMA Node X filename=/dev/nvme3n1 #NVMe NUMA Node X filename=/dev/nvme4n1 #NVMe NUMA Node X [filename2] iodepth=64 cpus_allowed=2 #CPU NUMA Node X filename=/dev/nvme5n1 #NVMe NUMA Node X filename=/dev/nvme6n1 #NVMe NUMA Node X -------- Change-Id: Ida2f781fbb93c4a8c62154e711151152843ab997 Signed-off-by: Karol Latecki <karol.latecki@intel.com> Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/3370 Community-CI: Mellanox Build Bot Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Tomasz Zawadzki <tomasz.zawadzki@intel.com> Reviewed-by: Michal Berger <michalx.berger@intel.com> Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
2020-07-16 09:08:03 +00:00
m=$((m + 1))
test/nvme_perf: unify fio job section for kernel and spdk For some reason the routine for generating fio job/filename sections was different for kernel and spdk fio modes. This made fio results hard to compare because even configuration files themselves were not comparable. The function was modified to create the same fio configuration files for kernel and spdk. While at it, iodepth calculation has been slightly modified as well. It is now being calculated for each [ ] job section, so that each "filename" gets the actual iodepth value specified in test run parameters. While generating the config file the script will add NUMA node information as a comment for "cpus_allowed" and "filename" sections, so that it's clearly visible how resources are aligned. In case of cross numa configuration additional warnings will be printed to screen as well. For example, a configuration file for running test with 7 NVMe drives, 3 CPUs and effective (per filename) queue depth of 32 would look like this: run_perf.sh --cpu-allowed=0,1,2 --iodepth=32 --driver=kernel-classic-polling [...other params...] -------- [filename0] iodepth=96 cpus_allowed=0 #CPU NUMA Node X filename=/dev/nvme0n1 #NVMe NUMA Node X filename=/dev/nvme1n1 #NVMe NUMA Node X filename=/dev/nvme2n1 #NVMe NUMA Node X [filename1] iodepth=64 cpus_allowed=1 #CPU NUMA Node X filename=/dev/nvme3n1 #NVMe NUMA Node X filename=/dev/nvme4n1 #NVMe NUMA Node X [filename2] iodepth=64 cpus_allowed=2 #CPU NUMA Node X filename=/dev/nvme5n1 #NVMe NUMA Node X filename=/dev/nvme6n1 #NVMe NUMA Node X -------- Change-Id: Ida2f781fbb93c4a8c62154e711151152843ab997 Signed-off-by: Karol Latecki <karol.latecki@intel.com> Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/3370 Community-CI: Mellanox Build Bot Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Tomasz Zawadzki <tomasz.zawadzki@intel.com> Reviewed-by: Michal Berger <michalx.berger@intel.com> Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
2020-07-16 09:08:03 +00:00
#Mark numa of n'th disk as "x" to mark it as claimed for next loop iterations
disks_numa[$n]="x"
fi
test/nvme_perf: unify fio job section for kernel and spdk For some reason the routine for generating fio job/filename sections was different for kernel and spdk fio modes. This made fio results hard to compare because even configuration files themselves were not comparable. The function was modified to create the same fio configuration files for kernel and spdk. While at it, iodepth calculation has been slightly modified as well. It is now being calculated for each [ ] job section, so that each "filename" gets the actual iodepth value specified in test run parameters. While generating the config file the script will add NUMA node information as a comment for "cpus_allowed" and "filename" sections, so that it's clearly visible how resources are aligned. In case of cross numa configuration additional warnings will be printed to screen as well. For example, a configuration file for running test with 7 NVMe drives, 3 CPUs and effective (per filename) queue depth of 32 would look like this: run_perf.sh --cpu-allowed=0,1,2 --iodepth=32 --driver=kernel-classic-polling [...other params...] -------- [filename0] iodepth=96 cpus_allowed=0 #CPU NUMA Node X filename=/dev/nvme0n1 #NVMe NUMA Node X filename=/dev/nvme1n1 #NVMe NUMA Node X filename=/dev/nvme2n1 #NVMe NUMA Node X [filename1] iodepth=64 cpus_allowed=1 #CPU NUMA Node X filename=/dev/nvme3n1 #NVMe NUMA Node X filename=/dev/nvme4n1 #NVMe NUMA Node X [filename2] iodepth=64 cpus_allowed=2 #CPU NUMA Node X filename=/dev/nvme5n1 #NVMe NUMA Node X filename=/dev/nvme6n1 #NVMe NUMA Node X -------- Change-Id: Ida2f781fbb93c4a8c62154e711151152843ab997 Signed-off-by: Karol Latecki <karol.latecki@intel.com> Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/3370 Community-CI: Mellanox Build Bot Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Tomasz Zawadzki <tomasz.zawadzki@intel.com> Reviewed-by: Michal Berger <michalx.berger@intel.com> Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
2020-07-16 09:08:03 +00:00
n=$((n + 1))
# If there is no more disks with numa node same as cpu numa node, switch to
# other numa node, go back to start of loop and try again.
if [[ $n -ge $total_disks ]]; then
echo "WARNING! Cannot assign any more NVMes for CPU ${cores[$i]}"
echo "NVMe assignment for this CPU will be cross-NUMA."
if [[ "$core_numa" == "1" ]]; then
core_numa=0
else
core_numa=1
fi
test/nvme_perf: unify fio job section for kernel and spdk For some reason the routine for generating fio job/filename sections was different for kernel and spdk fio modes. This made fio results hard to compare because even configuration files themselves were not comparable. The function was modified to create the same fio configuration files for kernel and spdk. While at it, iodepth calculation has been slightly modified as well. It is now being calculated for each [ ] job section, so that each "filename" gets the actual iodepth value specified in test run parameters. While generating the config file the script will add NUMA node information as a comment for "cpus_allowed" and "filename" sections, so that it's clearly visible how resources are aligned. In case of cross numa configuration additional warnings will be printed to screen as well. For example, a configuration file for running test with 7 NVMe drives, 3 CPUs and effective (per filename) queue depth of 32 would look like this: run_perf.sh --cpu-allowed=0,1,2 --iodepth=32 --driver=kernel-classic-polling [...other params...] -------- [filename0] iodepth=96 cpus_allowed=0 #CPU NUMA Node X filename=/dev/nvme0n1 #NVMe NUMA Node X filename=/dev/nvme1n1 #NVMe NUMA Node X filename=/dev/nvme2n1 #NVMe NUMA Node X [filename1] iodepth=64 cpus_allowed=1 #CPU NUMA Node X filename=/dev/nvme3n1 #NVMe NUMA Node X filename=/dev/nvme4n1 #NVMe NUMA Node X [filename2] iodepth=64 cpus_allowed=2 #CPU NUMA Node X filename=/dev/nvme5n1 #NVMe NUMA Node X filename=/dev/nvme6n1 #NVMe NUMA Node X -------- Change-Id: Ida2f781fbb93c4a8c62154e711151152843ab997 Signed-off-by: Karol Latecki <karol.latecki@intel.com> Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/3370 Community-CI: Mellanox Build Bot Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Tomasz Zawadzki <tomasz.zawadzki@intel.com> Reviewed-by: Michal Berger <michalx.berger@intel.com> Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
2020-07-16 09:08:03 +00:00
n=0
fi
done
test/nvme_perf: unify fio job section for kernel and spdk For some reason the routine for generating fio job/filename sections was different for kernel and spdk fio modes. This made fio results hard to compare because even configuration files themselves were not comparable. The function was modified to create the same fio configuration files for kernel and spdk. While at it, iodepth calculation has been slightly modified as well. It is now being calculated for each [ ] job section, so that each "filename" gets the actual iodepth value specified in test run parameters. While generating the config file the script will add NUMA node information as a comment for "cpus_allowed" and "filename" sections, so that it's clearly visible how resources are aligned. In case of cross numa configuration additional warnings will be printed to screen as well. For example, a configuration file for running test with 7 NVMe drives, 3 CPUs and effective (per filename) queue depth of 32 would look like this: run_perf.sh --cpu-allowed=0,1,2 --iodepth=32 --driver=kernel-classic-polling [...other params...] -------- [filename0] iodepth=96 cpus_allowed=0 #CPU NUMA Node X filename=/dev/nvme0n1 #NVMe NUMA Node X filename=/dev/nvme1n1 #NVMe NUMA Node X filename=/dev/nvme2n1 #NVMe NUMA Node X [filename1] iodepth=64 cpus_allowed=1 #CPU NUMA Node X filename=/dev/nvme3n1 #NVMe NUMA Node X filename=/dev/nvme4n1 #NVMe NUMA Node X [filename2] iodepth=64 cpus_allowed=2 #CPU NUMA Node X filename=/dev/nvme5n1 #NVMe NUMA Node X filename=/dev/nvme6n1 #NVMe NUMA Node X -------- Change-Id: Ida2f781fbb93c4a8c62154e711151152843ab997 Signed-off-by: Karol Latecki <karol.latecki@intel.com> Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/3370 Community-CI: Mellanox Build Bot Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Tomasz Zawadzki <tomasz.zawadzki@intel.com> Reviewed-by: Michal Berger <michalx.berger@intel.com> Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
2020-07-16 09:08:03 +00:00
done
test/nvme_perf: unify fio job section for kernel and spdk For some reason the routine for generating fio job/filename sections was different for kernel and spdk fio modes. This made fio results hard to compare because even configuration files themselves were not comparable. The function was modified to create the same fio configuration files for kernel and spdk. While at it, iodepth calculation has been slightly modified as well. It is now being calculated for each [ ] job section, so that each "filename" gets the actual iodepth value specified in test run parameters. While generating the config file the script will add NUMA node information as a comment for "cpus_allowed" and "filename" sections, so that it's clearly visible how resources are aligned. In case of cross numa configuration additional warnings will be printed to screen as well. For example, a configuration file for running test with 7 NVMe drives, 3 CPUs and effective (per filename) queue depth of 32 would look like this: run_perf.sh --cpu-allowed=0,1,2 --iodepth=32 --driver=kernel-classic-polling [...other params...] -------- [filename0] iodepth=96 cpus_allowed=0 #CPU NUMA Node X filename=/dev/nvme0n1 #NVMe NUMA Node X filename=/dev/nvme1n1 #NVMe NUMA Node X filename=/dev/nvme2n1 #NVMe NUMA Node X [filename1] iodepth=64 cpus_allowed=1 #CPU NUMA Node X filename=/dev/nvme3n1 #NVMe NUMA Node X filename=/dev/nvme4n1 #NVMe NUMA Node X [filename2] iodepth=64 cpus_allowed=2 #CPU NUMA Node X filename=/dev/nvme5n1 #NVMe NUMA Node X filename=/dev/nvme6n1 #NVMe NUMA Node X -------- Change-Id: Ida2f781fbb93c4a8c62154e711151152843ab997 Signed-off-by: Karol Latecki <karol.latecki@intel.com> Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/3370 Community-CI: Mellanox Build Bot Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Tomasz Zawadzki <tomasz.zawadzki@intel.com> Reviewed-by: Michal Berger <michalx.berger@intel.com> Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
2020-07-16 09:08:03 +00:00
printf "%s\n" "${fio_job_section[@]}" >> $testdir/config.fio
echo "INFO: Generated fio configuration file:"
cat $testdir/config.fio
}
function preconditioning() {
local dev_name=""
local filename=""
local nvme_list
HUGEMEM=8192 $rootdir/scripts/setup.sh
cp $testdir/config.fio.tmp $testdir/config.fio
echo "[Preconditioning]" >> $testdir/config.fio
# Generate filename argument for FIO.
# We only want to target NVMes not bound to nvme driver.
# If they're still bound to nvme that means they were skipped by
# setup.sh on purpose.
nvme_list=$(get_disks nvme)
for nvme in $nvme_list; do
dev_name='trtype=PCIe traddr='${nvme//:/.}' ns=1'
filename+=$(printf %s":" "$dev_name")
done
echo "** Preconditioning disks, this can take a while, depending on the size of disks."
run_spdk_nvme_fio "spdk-plugin-nvme" --filename="$filename" --size=100% --loops=2 --bs=1M \
--rw=write --iodepth=32 --output-format=normal
rm -f $testdir/config.fio
}
function get_results() {
local reads_pct
local writes_pct
reads_pct=$(bc -l <<< "scale=3; $2/100")
writes_pct=$(bc -l <<< "scale=3; 1-$reads_pct")
case "$1" in
iops)
iops=$(jq -r '.jobs[] | .read.iops + .write.iops' $TMP_RESULT_FILE)
iops=${iops%.*}
echo $iops
;;
mean_lat_usec)
mean_lat=$(jq -r ".jobs[] | (.read.lat_ns.mean * $reads_pct + .write.lat_ns.mean * $writes_pct)" $TMP_RESULT_FILE)
mean_lat=${mean_lat%.*}
echo $((mean_lat / 1000))
;;
p90_lat_usec)
p90_lat=$(jq -r ".jobs[] | (.read.clat_ns.percentile.\"90.000000\" // 0 * $reads_pct + .write.clat_ns.percentile.\"90.000000\" // 0 * $writes_pct)" $TMP_RESULT_FILE)
p90_lat=${p90_lat%.*}
echo $((p90_lat / 1000))
;;
p99_lat_usec)
p99_lat=$(jq -r ".jobs[] | (.read.clat_ns.percentile.\"99.000000\" // 0 * $reads_pct + .write.clat_ns.percentile.\"99.000000\" // 0 * $writes_pct)" $TMP_RESULT_FILE)
p99_lat=${p99_lat%.*}
echo $((p99_lat / 1000))
;;
p99_99_lat_usec)
p99_99_lat=$(jq -r ".jobs[] | (.read.clat_ns.percentile.\"99.990000\" // 0 * $reads_pct + .write.clat_ns.percentile.\"99.990000\" // 0 * $writes_pct)" $TMP_RESULT_FILE)
p99_99_lat=${p99_99_lat%.*}
echo $((p99_99_lat / 1000))
;;
stdev_usec)
stdev=$(jq -r ".jobs[] | (.read.clat_ns.stddev * $reads_pct + .write.clat_ns.stddev * $writes_pct)" $TMP_RESULT_FILE)
stdev=${stdev%.*}
echo $((stdev / 1000))
;;
mean_slat_usec)
mean_slat=$(jq -r ".jobs[] | (.read.slat_ns.mean * $reads_pct + .write.slat_ns.mean * $writes_pct)" $TMP_RESULT_FILE)
mean_slat=${mean_slat%.*}
echo $((mean_slat / 1000))
;;
mean_clat_usec)
mean_clat=$(jq -r ".jobs[] | (.read.clat_ns.mean * $reads_pct + .write.clat_ns.mean * $writes_pct)" $TMP_RESULT_FILE)
mean_clat=${mean_clat%.*}
echo $((mean_clat / 1000))
;;
bw_Kibs)
bw=$(jq -r ".jobs[] | (.read.bw + .write.bw)" $TMP_RESULT_FILE)
bw=${bw%.*}
echo $((bw))
;;
esac
}
function get_bdevperf_results() {
case "$1" in
iops)
iops=$(grep Total $TMP_RESULT_FILE | awk -F 'Total' '{print $2}' | awk '{print $2}')
iops=${iops%.*}
echo $iops
;;
bw_Kibs)
bw_MBs=$(grep Total $TMP_RESULT_FILE | awk -F 'Total' '{print $2}' | awk '{print $4}')
bw_MBs=${bw_MBs%.*}
echo $((bw_MBs * 1024))
;;
esac
}
function get_nvmeperf_results() {
local iops
local bw_MBs
local mean_lat_usec
local max_lat_usec
local min_lat_usec
read -r iops bw_MBs mean_lat_usec min_lat_usec max_lat_usec <<< $(tr -s " " < $TMP_RESULT_FILE | grep -oP "(?<=Total : )(.*+)")
# We need to get rid of the decimal spaces due
# to use of arithmetic expressions instead of "bc" for calculations
iops=${iops%.*}
bw_MBs=${bw_MBs%.*}
mean_lat_usec=${mean_lat_usec%.*}
min_lat_usec=${min_lat_usec%.*}
max_lat_usec=${max_lat_usec%.*}
echo "$iops $(bc <<< "$bw_MBs * 1024") $mean_lat_usec $min_lat_usec $max_lat_usec"
}
function run_spdk_nvme_fio() {
local plugin=$1
echo "** Running fio test, this can take a while, depending on the run-time and ramp-time setting."
if [[ "$plugin" = "spdk-plugin-nvme" ]]; then
LD_PRELOAD=$plugin_dir/spdk_nvme $FIO_BIN $testdir/config.fio --output-format=json "${@:2}" --ioengine=spdk
elif [[ "$plugin" = "spdk-plugin-bdev" ]]; then
LD_PRELOAD=$plugin_dir/spdk_bdev $FIO_BIN $testdir/config.fio --output-format=json "${@:2}" --ioengine=spdk_bdev --spdk_json_conf=$testdir/bdev.conf --spdk_mem=4096
fi
sleep 1
}
function run_nvme_fio() {
echo "** Running fio test, this can take a while, depending on the run-time and ramp-time setting."
$FIO_BIN $testdir/config.fio --output-format=json "$@"
sleep 1
}
function run_bdevperf() {
echo "** Running bdevperf test, this can take a while, depending on the run-time setting."
$bdevperf_dir/bdevperf --json $testdir/bdev.conf -q $IODEPTH -o $BLK_SIZE -w $RW -M $MIX -t $RUNTIME -m "[$CPUS_ALLOWED]" -r /var/tmp/spdk.sock
sleep 1
}
function run_nvmeperf() {
# Prepare -r argument string for nvme perf command
local r_opt
local disks
# Limit the number of disks to $1 if needed
disks=($(get_disks nvme))
disks=("${disks[@]:0:$1}")
r_opt=$(printf -- ' -r "trtype:PCIe traddr:%s"' "${disks[@]}")
echo "** Running nvme perf test, this can take a while, depending on the run-time setting."
# Run command in separate shell as this solves quoting issues related to r_opt var
$SHELL -c "$nvmeperf_dir/perf $r_opt -q $IODEPTH -o $BLK_SIZE -w $RW -M $MIX -t $RUNTIME -c [$CPUS_ALLOWED]"
sleep 1
}
function wait_for_nvme_reload() {
local nvmes=$1
shopt -s extglob
for disk in $nvmes; do
cmd="ls /sys/block/$disk/queue/*@(iostats|rq_affinity|nomerges|io_poll_delay)*"
until $cmd 2> /dev/null; do
echo "Waiting for full nvme driver reload..."
sleep 0.5
done
done
shopt -q extglob
}
function verify_disk_number() {
# Check if we have appropriate number of disks to carry out the test
disks=($(get_disks $PLUGIN))
if [[ $DISKNO == "ALL" ]] || [[ $DISKNO == "all" ]]; then
DISKNO=${#disks[@]}
elif [[ $DISKNO -gt ${#disks[@]} ]] || [[ ! $DISKNO =~ ^[0-9]+$ ]]; then
echo "error: Required devices number ($DISKNO) is not a valid number or it's larger than the number of devices found (${#disks[@]})"
false
fi
}