test/qos: set qos limits to a % of the maximum disk performance

We used to set an arbitrary qos limit which in some
cases happened to be higher than the actual disk
capabilities. Even though we had an explicit check
for that and we skipped the entire qos test suite
if the device was too slow, the disk performance could
vary and be just enough to pass that initial check,
but then slow down and fail in the middle of the test
suite. If the bdev maxes out at 21MB/s on one run, it
may just as well do 19MB/s on another. That is exactly
the case causing intermittent failures on our CI.

We fix it by removing the arbitrary qos limit and
setting it to a % of the maximum disk performance
instead. This lets us e.g. remove the code for skipping
the entire test suite when the disk is too slow. We
definitely don't want to skip any tests.

Change-Id: I6de8a183c00bab64484b4ddb12df1dedfbed23f8
Signed-off-by: Darek Stojaczyk <dariusz.stojaczyk@intel.com>
Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/451887
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Karol Latecki <karol.latecki@intel.com>
Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Reviewed-by: yidong0635 <dongx.yi@intel.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
This commit is contained in:
Darek Stojaczyk 2019-04-24 13:20:29 +02:00 committed by Jim Harris
parent 992ffd8071
commit 22364ca8f7

View File

@ -26,10 +26,13 @@ function check_qos_works_well() {
end_io_count=$(jq -r '.bdevs[0].num_read_ops' <<< "$iostats") end_io_count=$(jq -r '.bdevs[0].num_read_ops' <<< "$iostats")
end_bytes_read=$(jq -r '.bdevs[0].bytes_read' <<< "$iostats") end_bytes_read=$(jq -r '.bdevs[0].bytes_read' <<< "$iostats")
IOPS_RESULT=$(((end_io_count-start_io_count)/5))
BANDWIDTH_RESULT=$(((end_bytes_read-start_bytes_read)/5))
if [ $LIMIT_TYPE = IOPS ]; then if [ $LIMIT_TYPE = IOPS ]; then
read_result=$(((end_io_count-start_io_count)/5)) read_result=$IOPS_RESULT
else else
read_result=$(((end_bytes_read-start_bytes_read)/5)) read_result=$BANDWIDTH_RESULT
fi fi
if [ $enable_limit = true ]; then if [ $enable_limit = true ]; then
@ -42,16 +45,10 @@ function check_qos_works_well() {
else else
retval=$(echo "$read_result > $qos_limit" | bc) retval=$(echo "$read_result > $qos_limit" | bc)
if [ $retval -eq 0 ]; then if [ $retval -eq 0 ]; then
if [ $check_qos = true ]; then
echo "$read_result less than $qos_limit - exit QoS testing"
ENABLE_QOS=false
exit 0
else
echo "$read_result less than $qos_limit - expected greater than" echo "$read_result less than $qos_limit - expected greater than"
exit 1 exit 1
fi fi
fi fi
fi
} }
if [ -z "$TARGET_IP" ]; then if [ -z "$TARGET_IP" ]; then
@ -68,12 +65,8 @@ timing_enter qos
MALLOC_BDEV_SIZE=64 MALLOC_BDEV_SIZE=64
MALLOC_BLOCK_SIZE=512 MALLOC_BLOCK_SIZE=512
ENABLE_QOS=true IOPS_RESULT=
IOPS_LIMIT=20000 BANDWIDTH_RESULT=
BANDWIDTH_LIMIT_MB=20
BANDWIDTH_LIMIT=$(($BANDWIDTH_LIMIT_MB*1024*1024))
READ_BANDWIDTH_LIMIT_MB=10
READ_BANDWIDTH_LIMIT=$(($READ_BANDWIDTH_LIMIT_MB*1024*1024))
LIMIT_TYPE=IOPS LIMIT_TYPE=IOPS
rpc_py="$rootdir/scripts/rpc.py" rpc_py="$rootdir/scripts/rpc.py"
fio_py="$rootdir/scripts/fio.py" fio_py="$rootdir/scripts/fio.py"
@ -104,37 +97,50 @@ iscsiadm -m node --login -p $TARGET_IP:$ISCSI_PORT
trap "iscsicleanup; killprocess $pid; iscsitestfini $1 $2; exit 1" SIGINT SIGTERM EXIT trap "iscsicleanup; killprocess $pid; iscsitestfini $1 $2; exit 1" SIGINT SIGTERM EXIT
# Check whether to enable the QoS testing. # Run FIO without any QOS limits to determine the raw performance
check_qos_works_well false $IOPS_LIMIT Malloc0 true check_qos_works_well false 0 Malloc0
# Set IOPS/bandwidth limit to 50% of the actual unrestrained performance.
IOPS_LIMIT=$(($IOPS_RESULT/2))
BANDWIDTH_LIMIT=$(($BANDWIDTH_RESULT/2))
# Set READ bandwidth limit to 50% of the RW bandwidth limit to be able
# to differentiate those two.
READ_BANDWIDTH_LIMIT=$(($BANDWIDTH_LIMIT/2))
# Also round them down to nearest multiple of either 1000 IOPS or 1MB BW
# which are the minimal QoS granularities
IOPS_LIMIT=$(($IOPS_LIMIT/1000*1000))
BANDWIDTH_LIMIT_MB=$(($BANDWIDTH_LIMIT/1024/1024))
BANDWIDTH_LIMIT=$(($BANDWIDTH_LIMIT_MB*1024*1024))
READ_BANDWIDTH_LIMIT_MB=$(($READ_BANDWIDTH_LIMIT/1024/1024))
READ_BANDWIDTH_LIMIT=$(($READ_BANDWIDTH_LIMIT_MB*1024*1024))
if [ $ENABLE_QOS = true ]; then
# Limit the I/O rate by RPC, then confirm the observed rate matches. # Limit the I/O rate by RPC, then confirm the observed rate matches.
$rpc_py set_bdev_qos_limit Malloc0 --rw_ios_per_sec $IOPS_LIMIT $rpc_py set_bdev_qos_limit Malloc0 --rw_ios_per_sec $IOPS_LIMIT
check_qos_works_well true $IOPS_LIMIT Malloc0 false check_qos_works_well true $IOPS_LIMIT Malloc0
# Now disable the rate limiting, and confirm the observed rate is not limited anymore. # Now disable the rate limiting, and confirm the observed rate is not limited anymore.
$rpc_py set_bdev_qos_limit Malloc0 --rw_ios_per_sec 0 $rpc_py set_bdev_qos_limit Malloc0 --rw_ios_per_sec 0
check_qos_works_well false $IOPS_LIMIT Malloc0 false check_qos_works_well false $IOPS_LIMIT Malloc0
# Limit the I/O rate again. # Limit the I/O rate again.
$rpc_py set_bdev_qos_limit Malloc0 --rw_ios_per_sec $IOPS_LIMIT $rpc_py set_bdev_qos_limit Malloc0 --rw_ios_per_sec $IOPS_LIMIT
check_qos_works_well true $IOPS_LIMIT Malloc0 false check_qos_works_well true $IOPS_LIMIT Malloc0
echo "I/O rate limiting tests successful" echo "I/O rate limiting tests successful"
# Limit the I/O bandwidth rate by RPC, then confirm the observed rate matches. # Limit the I/O bandwidth rate by RPC, then confirm the observed rate matches.
LIMIT_TYPE=BANDWIDTH LIMIT_TYPE=BANDWIDTH
$rpc_py set_bdev_qos_limit Malloc0 --rw_ios_per_sec 0 --rw_mbytes_per_sec $BANDWIDTH_LIMIT_MB $rpc_py set_bdev_qos_limit Malloc0 --rw_ios_per_sec 0 --rw_mbytes_per_sec $BANDWIDTH_LIMIT_MB
check_qos_works_well true $BANDWIDTH_LIMIT Malloc0 false check_qos_works_well true $BANDWIDTH_LIMIT Malloc0
# Now disable the bandwidth rate limiting, and confirm the observed rate is not limited anymore. # Now disable the bandwidth rate limiting, and confirm the observed rate is not limited anymore.
$rpc_py set_bdev_qos_limit Malloc0 --rw_mbytes_per_sec 0 $rpc_py set_bdev_qos_limit Malloc0 --rw_mbytes_per_sec 0
check_qos_works_well false $BANDWIDTH_LIMIT Malloc0 false check_qos_works_well false $BANDWIDTH_LIMIT Malloc0
# Limit the I/O bandwidth rate again with both read/write and read/only. # Limit the I/O bandwidth rate again with both read/write and read/only.
$rpc_py set_bdev_qos_limit Malloc0 --rw_mbytes_per_sec $BANDWIDTH_LIMIT_MB --r_mbytes_per_sec $READ_BANDWIDTH_LIMIT_MB $rpc_py set_bdev_qos_limit Malloc0 --rw_mbytes_per_sec $BANDWIDTH_LIMIT_MB --r_mbytes_per_sec $READ_BANDWIDTH_LIMIT_MB
check_qos_works_well true $READ_BANDWIDTH_LIMIT Malloc0 false check_qos_works_well true $READ_BANDWIDTH_LIMIT Malloc0
echo "I/O bandwidth limiting tests successful" echo "I/O bandwidth limiting tests successful"
fi
iscsicleanup iscsicleanup
$rpc_py delete_target_node 'iqn.2016-06.io.spdk:Target1' $rpc_py delete_target_node 'iqn.2016-06.io.spdk:Target1'