During tests, the main portion of the app's time is spent in user mode when polling is in effect. Currently, the idleness of given cpu is determined in comparision to all the possible states the cpu can be in. This makes it easier for third party processes to impact the idleness of the cpu (as it may be lowered via different kinds of loads). For instance, if target cpu suddenly sees spike in a system load (even up to 100%) it should not be relevant for the test where it's known that the app's polling utilized mainly user mode prior switching the scheduler. With that in mind, if the general idle check fails, lookup the raw samples matching the user mode and if the load is relatively low (<= 15%) consider this as a pass. Signed-off-by: Michal Berger <michallinuxstuff@gmail.com> Change-Id: I4ab260d8bcf20a69f2f0be10f0fd7be577682be3 Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/12909 Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Konrad Sztyber <konrad.sztyber@intel.com> Reviewed-by: Tomasz Zawadzki <tomasz.zawadzki@intel.com>
621 lines
16 KiB
Bash
621 lines
16 KiB
Bash
shopt -s nullglob extglob
|
|
|
|
declare -r sysfs_system=/sys/devices/system
|
|
declare -r sysfs_cpu=$sysfs_system/cpu
|
|
declare -r sysfs_node=$sysfs_system/node
|
|
|
|
declare -r scheduler=$rootdir/test/event/scheduler/scheduler
|
|
declare -r plugin=scheduler_plugin
|
|
|
|
source "$rootdir/test/scheduler/cgroups.sh"
|
|
|
|
fold_list_onto_array() {
|
|
local array=$1
|
|
local elem
|
|
|
|
shift || return 0
|
|
|
|
for elem; do
|
|
eval "${array}[elem]=$elem"
|
|
done
|
|
}
|
|
|
|
fold_array_onto_string() {
|
|
local cpus=("$@")
|
|
|
|
local IFS=","
|
|
echo "${cpus[*]}"
|
|
}
|
|
|
|
parse_cpu_list() {
|
|
local list=$1
|
|
local elem elems cpus
|
|
|
|
# 0-2,4,6-9, etc.
|
|
IFS="," read -ra elems < "$list"
|
|
|
|
((${#elems[@]} > 0)) || return 0
|
|
|
|
for elem in "${elems[@]}"; do
|
|
if [[ $elem == *-* ]]; then
|
|
local start=${elem%-*} end=${elem#*-}
|
|
while ((start <= end)); do
|
|
cpus[start++]=$start
|
|
done
|
|
else
|
|
cpus[elem]=$elem
|
|
fi
|
|
done
|
|
printf '%u\n' "${!cpus[@]}"
|
|
}
|
|
|
|
map_cpus_node() {
|
|
local node_idx=$1
|
|
local -n _cpu_node_map=node_${node_idx}_cpu
|
|
local cpu_idx core_idx
|
|
|
|
for cpu_idx in $(parse_cpu_list "$sysfs_node/node$node_idx/cpulist"); do
|
|
if is_cpu_online "$cpu_idx"; then
|
|
core_idx=$(< "$sysfs_cpu/cpu$cpu_idx/topology/core_id")
|
|
local -n _cpu_core_map=node_${node_idx}_core_${core_idx}
|
|
_cpu_core_map+=("$cpu_idx") cpu_core_map[cpu_idx]=$core_idx
|
|
fi
|
|
_cpu_node_map+=("$cpu_idx") cpu_node_map[cpu_idx]=$node_idx
|
|
cpus+=("$cpu_idx")
|
|
done
|
|
|
|
nodes[node_idx]=$node_idx
|
|
}
|
|
|
|
map_cpus() {
|
|
local -g cpus=()
|
|
local -g nodes=()
|
|
local -g cpu_node_map=()
|
|
local -g cpu_core_map=()
|
|
local -g core_node_map=()
|
|
local node
|
|
|
|
unset -v "${!node_@}"
|
|
|
|
for node in "$sysfs_node/node"+([0-9]); do
|
|
map_cpus_node "${node##*node}"
|
|
done
|
|
}
|
|
|
|
get_cpus() {
|
|
local node=$1
|
|
local core=$2
|
|
local _cpus
|
|
|
|
if [[ -z $node ]]; then
|
|
_cpus=("${cpus[@]}")
|
|
elif [[ -n $node ]]; then
|
|
eval "_cpus=(\${node_${node}_cpu[@]})"
|
|
if [[ -n $core ]]; then
|
|
eval "_cpus=(\${node_${node}_core_${core}[@]})"
|
|
fi
|
|
fi
|
|
((${#_cpus[@]} > 0)) || return 1
|
|
printf '%u\n' "${_cpus[@]}"
|
|
}
|
|
|
|
get_isolated_cpus() {
|
|
[[ -e $sysfs_cpu/isolated ]] || return 0
|
|
parse_cpu_list "$sysfs_cpu/isolated"
|
|
}
|
|
|
|
get_offline_cpus() {
|
|
local offline
|
|
|
|
[[ -e $sysfs_cpu/offline ]] || return 0
|
|
parse_cpu_list "$sysfs_cpu/offline"
|
|
}
|
|
|
|
get_online_cpus() {
|
|
[[ -e $sysfs_cpu/online ]] || return 0
|
|
parse_cpu_list "$sysfs_cpu/online"
|
|
}
|
|
|
|
is_cpu_online() {
|
|
local online
|
|
|
|
fold_list_onto_array online $(get_online_cpus)
|
|
[[ -v online[$1] ]]
|
|
}
|
|
|
|
is_cpu_offline() {
|
|
! is_cpu_online "$1"
|
|
}
|
|
|
|
online_cpu() {
|
|
is_cpu_offline "$1" || return 0
|
|
[[ -e $sysfs_cpu/cpu$1/online ]] && echo 1 > "$sysfs_cpu/cpu$1/online"
|
|
}
|
|
|
|
offline_cpu() {
|
|
is_cpu_online "$1" || return 0
|
|
[[ -e $sysfs_cpu/cpu$1/online ]] && echo 0 > "$sysfs_cpu/cpu$1/online"
|
|
}
|
|
|
|
mask_cpus() {
|
|
local cpu
|
|
local mask=0
|
|
|
|
for cpu; do
|
|
((mask |= 1 << cpu))
|
|
done
|
|
printf '0x%x\n' "$mask"
|
|
}
|
|
|
|
denied_list() {
|
|
local -g denied
|
|
|
|
fold_list_onto_array denied $(get_offline_cpus) "$@"
|
|
}
|
|
|
|
filter_allowed_list() {
|
|
local cpu
|
|
|
|
for cpu in "${!allowed[@]}"; do
|
|
if [[ -n ${denied[cpu]} ]]; then
|
|
unset -v "allowed[cpu]"
|
|
fi
|
|
done
|
|
}
|
|
|
|
allowed_list() {
|
|
local max=${1:-4}
|
|
local node=${2:-0}
|
|
local cpu_count=${cpu_count:--1}
|
|
|
|
local -g allowed
|
|
|
|
fold_list_onto_array allowed $(get_isolated_cpus)
|
|
|
|
if ((cpu_count < 0 && ${#allowed[@]} > 0)); then
|
|
((max += ${#allowed[@]}))
|
|
fi
|
|
|
|
local -n node_cpu_ref=node_${node}_cpu
|
|
|
|
while ((${#allowed[@]} < max && ++cpu_count < ${#node_cpu_ref[@]})); do
|
|
fold_list_onto_array allowed $(get_cpus "$node" "${cpu_core_map[node_cpu_ref[cpu_count]]}")
|
|
done
|
|
|
|
filter_allowed_list
|
|
|
|
if ((${#allowed[@]} == max)); then
|
|
return 0
|
|
elif ((cpu_count == ${#node_cpu_ref[@]})); then
|
|
return 0
|
|
else
|
|
allowed_list "$max" "$node"
|
|
fi
|
|
}
|
|
|
|
get_proc_cpu_affinity() {
|
|
xtrace_disable
|
|
|
|
local pid=${1:-$$}
|
|
local status val
|
|
|
|
[[ -e /proc/$pid/status ]] || return 1
|
|
while IFS=":"$'\t' read -r status val; do
|
|
if [[ $status == Cpus_allowed_list ]]; then
|
|
parse_cpu_list <(echo "$val")
|
|
return 0
|
|
fi
|
|
done < "/proc/$pid/status"
|
|
|
|
xtrace_restore
|
|
}
|
|
|
|
map_cpufreq() {
|
|
# This info is used to cross-reference current cpufreq setup with
|
|
# what DPDK's governor actually puts in place.
|
|
|
|
local -g cpufreq_drivers=()
|
|
local -g cpufreq_governors=()
|
|
local -g cpufreq_base_freqs=()
|
|
local -g cpufreq_max_freqs=()
|
|
local -g cpufreq_min_freqs=()
|
|
local -g cpufreq_cur_freqs=()
|
|
local -g cpufreq_is_turbo=()
|
|
local -g cpufreq_available_freqs=()
|
|
local -g cpufreq_available_governors=()
|
|
local -g cpufreq_high_prio=()
|
|
local -g cpufreq_non_turbo_ratio=()
|
|
local -g cpufreq_setspeed=()
|
|
local -g cpuinfo_max_freqs=()
|
|
local -g cpuinfo_min_freqs=()
|
|
local -g turbo_enabled=0
|
|
local cpu cpu_idx
|
|
|
|
for cpu in "$sysfs_cpu/cpu"+([0-9]); do
|
|
cpu_idx=${cpu##*cpu}
|
|
[[ -e $cpu/cpufreq ]] || continue
|
|
cpufreq_drivers[cpu_idx]=$(< "$cpu/cpufreq/scaling_driver")
|
|
cpufreq_governors[cpu_idx]=$(< "$cpu/cpufreq/scaling_governor")
|
|
|
|
# In case HWP is on
|
|
if [[ -e $cpu/cpufreq/base_frequency ]]; then
|
|
cpufreq_base_freqs[cpu_idx]=$(< "$cpu/cpufreq/base_frequency")
|
|
fi
|
|
|
|
cpufreq_cur_freqs[cpu_idx]=$(< "$cpu/cpufreq/scaling_cur_freq")
|
|
cpufreq_max_freqs[cpu_idx]=$(< "$cpu/cpufreq/scaling_max_freq")
|
|
cpufreq_min_freqs[cpu_idx]=$(< "$cpu/cpufreq/scaling_min_freq")
|
|
|
|
local -n available_governors=available_governors_cpu_${cpu_idx}
|
|
cpufreq_available_governors[cpu_idx]="available_governors_cpu_${cpu_idx}[@]"
|
|
available_governors=($(< "$cpu/cpufreq/scaling_available_governors"))
|
|
|
|
local -n available_freqs=available_freqs_cpu_${cpu_idx}
|
|
cpufreq_available_freqs[cpu_idx]="available_freqs_cpu_${cpu_idx}[@]"
|
|
|
|
case "${cpufreq_drivers[cpu_idx]}" in
|
|
acpi-cpufreq)
|
|
available_freqs=($(< "$cpu/cpufreq/scaling_available_frequencies"))
|
|
if ((available_freqs[0] - 1000 == available_freqs[1])); then
|
|
cpufreq_is_turbo[cpu_idx]=1
|
|
else
|
|
cpufreq_is_turbo[cpu_idx]=0
|
|
fi
|
|
cpufreq_setspeed[cpu_idx]=$(< "$cpu/cpufreq/scaling_setspeed")
|
|
;;
|
|
intel_pstate | intel_cpufreq) # active or passive
|
|
local non_turbo_ratio base_max_freq num_freq freq is_turbo=0
|
|
|
|
non_turbo_ratio=$("$testdir/rdmsr.pl" "$cpu_idx" 0xce)
|
|
cpuinfo_min_freqs[cpu_idx]=$(< "$cpu/cpufreq/cpuinfo_min_freq")
|
|
cpuinfo_max_freqs[cpu_idx]=$(< "$cpu/cpufreq/cpuinfo_max_freq")
|
|
cpufreq_non_turbo_ratio[cpu_idx]=$(((non_turbo_ratio >> 8) & 0xff))
|
|
if ((cpufreq_base_freqs[cpu_idx] / 100000 > cpufreq_non_turbo_ratio[cpu_idx])); then
|
|
cpufreq_high_prio[cpu_idx]=1
|
|
base_max_freq=${cpufreq_base_freqs[cpu_idx]}
|
|
else
|
|
cpufreq_high_prio[cpu_idx]=0
|
|
base_max_freq=$((cpufreq_non_turbo_ratio[cpu_idx] * 100000))
|
|
fi
|
|
num_freqs=$(((base_max_freq - cpuinfo_min_freqs[cpu_idx]) / 100000 + 1))
|
|
if ((base_max_freq < cpuinfo_max_freqs[cpu_idx])); then
|
|
((num_freqs += 1))
|
|
cpufreq_is_turbo[cpu_idx]=1
|
|
else
|
|
cpufreq_is_turbo[cpu_idx]=0
|
|
fi
|
|
available_freqs=()
|
|
for ((freq = 0; freq < num_freqs; freq++)); do
|
|
if ((freq == 0 && cpufreq_is_turbo[cpu_idx] == 1)); then
|
|
available_freqs[freq]=$((base_max_freq + 1))
|
|
else
|
|
available_freqs[freq]=$((base_max_freq - (freq - cpufreq_is_turbo[cpu_idx]) * 100000))
|
|
fi
|
|
done
|
|
;;
|
|
cppc_cpufreq)
|
|
cpufreq_setspeed[cpu_idx]=$(< "$cpu/cpufreq/scaling_setspeed")
|
|
scaling_min_freqs[cpu_idx]=$(< "$cpu/cpufreq/scaling_min_freq")
|
|
scaling_max_freqs[cpu_idx]=$(< "$cpu/cpufreq/scaling_max_freq")
|
|
cpuinfo_max_freqs[cpu_idx]=$(< "$cpu/cpufreq/cpuinfo_max_freq")
|
|
nominal_perf[cpu_idx]=$(< "$cpu/acpi_cppc/nominal_perf")
|
|
highest_perf[cpu_idx]=$(< "$cpu/acpi_cppc/highest_perf")
|
|
|
|
#the unit of highest_perf and nominal_perf differs on different arm platforms.
|
|
#For highest_perf, it maybe 300 or 3000000, both means 3.0GHz.
|
|
if ((highest_perf[cpu_idx] > nominal_perf[cpu_idx] && (\
|
|
highest_perf[cpu_idx] == cpuinfo_max_freqs[cpu_idx] || \
|
|
highest_perf[cpu_idx] * 10000 == cpuinfo_max_freqs[cpu_idx]))); then
|
|
cpufreq_is_turbo[cpu_idx]=1
|
|
else
|
|
cpufreq_is_turbo[cpu_idx]=0
|
|
fi
|
|
|
|
if ((nominal_perf[cpu_idx] < 10000)); then
|
|
nominal_perf[cpu_idx]=$((nominal_perf[cpu_idx] * 10000))
|
|
fi
|
|
|
|
num_freqs=$(((nominal_perf[cpu_idx] - scaling_min_freqs[cpu_idx]) / 100000 + 1 + \
|
|
cpufreq_is_turbo[cpu_idx]))
|
|
|
|
available_freqs=()
|
|
for ((freq = 0; freq < num_freqs; freq++)); do
|
|
if ((freq == 0 && cpufreq_is_turbo[cpu_idx] == 1)); then
|
|
available_freqs[freq]=$((scaling_max_freqs[cpu_idx]))
|
|
else
|
|
available_freqs[freq]=$((nominal_perf[cpu_idx] - (\
|
|
freq - cpufreq_is_turbo[cpu_idx]) * 100000))
|
|
fi
|
|
done
|
|
;;
|
|
esac
|
|
done
|
|
if [[ -e $sysfs_cpu/cpufreq/boost ]]; then
|
|
turbo_enabled=$(< "$sysfs_cpu/cpufreq/boost")
|
|
elif [[ -e $sysfs_cpu/intel_pstate/no_turbo ]]; then
|
|
turbo_enabled=$((!$(< "$sysfs_cpu/intel_pstate/no_turbo")))
|
|
fi
|
|
}
|
|
|
|
set_cpufreq() {
|
|
local cpu=$1
|
|
local min_freq=$2
|
|
local max_freq=$3
|
|
local cpufreq=$sysfs_cpu/cpu$cpu/cpufreq
|
|
|
|
# Map the cpufreq info first
|
|
[[ -n ${cpufreq_drivers[cpu]} ]] || return 1
|
|
[[ -n $min_freq ]] || return 1
|
|
|
|
case "${cpufreq_drivers[cpu]}" in
|
|
acpi-cpufreq)
|
|
if [[ ${cpufreq_governors[cpu]} != userspace ]]; then
|
|
echo "userspace" > "$cpufreq/scaling_governors"
|
|
fi
|
|
echo "$min_freq" > "$cpufreq/scaling_setspeed"
|
|
;;
|
|
intel_pstate | intel_cpufreq)
|
|
if ((min_freq <= cpufreq_max_freqs[cpu])); then
|
|
echo "$min_freq" > "$cpufreq/scaling_min_freq"
|
|
fi
|
|
if [[ -n $max_freq ]] && ((max_freq >= min_freq)); then
|
|
echo "$max_freq" > "$cpufreq/scaling_max_freq"
|
|
fi
|
|
;;
|
|
esac
|
|
}
|
|
|
|
set_cpufreq_governor() {
|
|
local cpu=$1
|
|
local governor=$2
|
|
local cpufreq=$sysfs_cpu/cpu$cpu/cpufreq
|
|
|
|
if [[ $(< "$cpufreq/scaling_governor") != "$governor" ]]; then
|
|
echo "$governor" > "$cpufreq/scaling_governor"
|
|
fi
|
|
}
|
|
|
|
exec_under_dynamic_scheduler() {
|
|
if [[ -e /proc/$spdk_pid/status ]]; then
|
|
killprocess "$spdk_pid"
|
|
fi
|
|
exec_in_cgroup "/cpuset/spdk" "$@" --wait-for-rpc &
|
|
spdk_pid=$!
|
|
# Give some time for the app to init itself
|
|
waitforlisten "$spdk_pid"
|
|
"$rootdir/scripts/rpc.py" framework_set_scheduler dynamic
|
|
"$rootdir/scripts/rpc.py" framework_start_init
|
|
}
|
|
|
|
get_thread_stats() {
|
|
xtrace_disable
|
|
_get_thread_stats busy idle
|
|
xtrace_restore
|
|
}
|
|
|
|
_get_thread_stats() {
|
|
local list_busy=$1
|
|
local list_idle=$2
|
|
local thread threads stats
|
|
|
|
stats=$(rpc_cmd thread_get_stats | jq -r '.threads[]')
|
|
threads=($(jq -r '.id' <<< "$stats"))
|
|
|
|
for thread in "${threads[@]}"; do
|
|
eval "${list_busy}[$thread]=\$(jq -r \"select(.id == $thread) | .busy\" <<< \$stats)"
|
|
eval "${list_idle}[$thread]=\$(jq -r \"select(.id == $thread) | .idle\" <<< \$stats)"
|
|
thread_map[thread]=$(jq -r "select(.id == $thread) | .name" <<< "$stats")
|
|
done
|
|
}
|
|
|
|
get_cpu_stat() {
|
|
local cpu_idx=$1
|
|
local stat=$2 stats astats
|
|
|
|
while read -r cpu stats; do
|
|
[[ $cpu == "cpu$cpu_idx" ]] && astats=($stats)
|
|
done < /proc/stat
|
|
|
|
case "$stat" in
|
|
idle) echo "${astats[3]}" ;;
|
|
all) printf '%u\n' "${astats[@]}" ;;
|
|
*) ;;
|
|
esac
|
|
}
|
|
|
|
create_thread() {
|
|
rpc_cmd --plugin "$plugin" scheduler_thread_create "$@"
|
|
}
|
|
|
|
destroy_thread() {
|
|
rpc_cmd --plugin "$plugin" scheduler_thread_delete "$@"
|
|
}
|
|
|
|
active_thread() {
|
|
rpc_cmd --plugin "$plugin" scheduler_thread_set_active "$@"
|
|
}
|
|
|
|
get_cpu_time() {
|
|
xtrace_disable
|
|
|
|
local interval=$1 cpu_time=${2:-idle} interval_count
|
|
shift 2
|
|
local cpus=("$@") cpu
|
|
local stats stat old_stats avg_load
|
|
local total_sample
|
|
|
|
# Exposed for the caller
|
|
local -g cpu_times=()
|
|
local -g avg_cpu_time=()
|
|
|
|
# cpu_time:
|
|
# 0 - user (time spent in user mode)
|
|
# 1 - nice (Time spent in user mode with low priority)
|
|
# 2 - system (Time spent in system mode)
|
|
# 3 - idle (Time spent in the idle task)
|
|
# 4 - iowait (Time waiting for I/O to complete)
|
|
# 5 - irq (Time servicing interrupts)
|
|
# 6 - softirq (Time servicing softirqs)
|
|
# 7 - steal (Stolen time)
|
|
# 8 - guest (Time spent running a virtual CPU)
|
|
# 9 - guest_nice (Time spent running a niced guest)
|
|
|
|
local -gA cpu_time_map
|
|
cpu_time_map["user"]=0
|
|
cpu_time_map["nice"]=1
|
|
cpu_time_map["system"]=2
|
|
cpu_time_map["idle"]=3
|
|
cpu_time_map["iowait"]=4
|
|
cpu_time_map["irq"]=5
|
|
cpu_time_map["softirq"]=6
|
|
cpu_time_map["steal"]=7
|
|
cpu_time_map["guest"]=8
|
|
cpu_time_map["guest_nice"]=9
|
|
|
|
# Clear up the env
|
|
unset -v ${!stat_@}
|
|
unset -v ${!old_stat_@}
|
|
unset -v ${!avg_stat@}
|
|
unset -v ${!avg_load@}
|
|
unset -v ${!raw_samples@}
|
|
|
|
cpu_time=${cpu_time_map["$cpu_time"]}
|
|
interval=$((interval <= 0 ? 1 : interval))
|
|
# We skip first sample to have min 2 for stat comparison
|
|
interval=$((interval + 1)) interval_count=0
|
|
while ((interval_count++, --interval >= 0)); do
|
|
for cpu in "${cpus[@]}"; do
|
|
local -n old_stats=old_stats_$cpu
|
|
local -n avg_load=avg_load_$cpu
|
|
local -n raw_samples=raw_samples_$cpu
|
|
|
|
sample_stats=() total_sample=0
|
|
|
|
stats=($(get_cpu_stat "$cpu" all))
|
|
if ((interval_count == 1)); then
|
|
# Skip first sample
|
|
old_stats=("${stats[@]}")
|
|
continue
|
|
fi
|
|
for stat in "${!stats[@]}"; do
|
|
avg_load[stat]="stat_${stat}_${cpu}[@]"
|
|
sample_stats[stat]=$((stats[stat] - old_stats[stat]))
|
|
: $((total_sample += sample_stats[stat]))
|
|
done
|
|
for stat in "${!stats[@]}"; do
|
|
local -n avg_stat=stat_${stat}_${cpu}
|
|
local -n raw_samples_ref=raw_samples_${stat}_${cpu}
|
|
raw_samples[stat]="raw_samples_${stat}_${cpu}[@]"
|
|
raw_samples_ref+=("${stats[stat]}")
|
|
avg_stat+=($((sample_stats[stat] * 100 / (total_sample == 0 ? 1 : total_sample))))
|
|
done
|
|
old_stats=("${stats[@]}")
|
|
done
|
|
sleep 1s
|
|
done
|
|
|
|
# We collected % for each time. Now determine the avg % for requested time.
|
|
local load stat_load
|
|
for cpu in "${cpus[@]}"; do
|
|
load=0
|
|
local -n avg_load_cpu=avg_load_$cpu
|
|
stat_load=("${!avg_load_cpu[cpu_time]}")
|
|
for stat in "${stat_load[@]}"; do
|
|
: $((load += stat))
|
|
done
|
|
cpu_times[cpu]=${stat_load[*]}
|
|
avg_cpu_time[cpu]=$((load / ${#stat_load[@]}))
|
|
done
|
|
|
|
xtrace_restore
|
|
}
|
|
|
|
collect_cpu_idle() {
|
|
((${#cpus_to_collect[@]} > 0)) || return 1
|
|
|
|
local time=${1:-5}
|
|
local cpu
|
|
local samples
|
|
local -g is_idle=()
|
|
|
|
printf 'Collecting cpu idle stats (cpus: %s) for %u seconds...\n' \
|
|
"${cpus_to_collect[*]}" "$time"
|
|
|
|
get_cpu_time "$time" idle "${cpus_to_collect[@]}"
|
|
|
|
local user_load
|
|
for cpu in "${cpus_to_collect[@]}"; do
|
|
samples=(${cpu_times[cpu]})
|
|
printf '* cpu%u idle samples: %s (avg: %u%%)\n' \
|
|
"$cpu" "${samples[*]}" "${avg_cpu_time[cpu]}"
|
|
# Cores with polling reactors have 0% idle time,
|
|
# while the ones in interrupt mode won't have 100% idle.
|
|
# During the tests, polling reactors spend the major portion
|
|
# of their cpu time in user mode. With that in mind, if the
|
|
# general check for cpus's idleness fails, check what portion
|
|
# of the cpu load falls into user mode. For the idle check
|
|
# use the last sample. For the cpu load, compare user's raw
|
|
# samples in SC_CLK_TCK context for a more detailed view.
|
|
user_load=$(cpu_usage_clk_tck "$cpu" user)
|
|
if ((samples[-1] >= 70)); then
|
|
printf '* cpu%u is idle\n' "$cpu"
|
|
is_idle[cpu]=1
|
|
elif ((user_load <= 15)); then
|
|
printf '* cpu%u not fully idle, but user load is low so passing\n' "$cpu"
|
|
is_idle[cpu]=1
|
|
else
|
|
printf '* cpu%u is not idle\n' "$cpu"
|
|
is_idle[cpu]=0
|
|
fi
|
|
done
|
|
}
|
|
|
|
cpu_usage_clk_tck() {
|
|
local cpu=$1 time=${2:-all}
|
|
local user nice system usage clk_delta
|
|
|
|
# We should be called in get_cpu_time()'s environment.
|
|
[[ -v raw_samples_$cpu ]] || return 1
|
|
|
|
local -n raw_samples=raw_samples_$cpu
|
|
user=("${!raw_samples[cpu_time_map["user"]]}")
|
|
nice=("${!raw_samples[cpu_time_map["nice"]]}")
|
|
system=("${!raw_samples[cpu_time_map["system"]]}")
|
|
|
|
# Construct delta based on last two samples of a given time.
|
|
case "$time" in
|
|
user | all) ((clk_delta += (user[-1] - user[-2]))) ;;&
|
|
nice | all) ((clk_delta += (nice[-1] - nice[-2]))) ;;&
|
|
system | all) ((clk_delta += (system[-1] - system[-2]))) ;;
|
|
*) ;;
|
|
esac
|
|
# We assume 1s between each sample. See get_cpu_time().
|
|
usage=$((100 * clk_delta / $(getconf CLK_TCK)))
|
|
usage=$((usage > 100 ? 100 : usage))
|
|
|
|
printf '%u' "$usage"
|
|
printf '* cpu%u %s usage: %u\n' "$cpu" "$time" "$usage" >&2
|
|
printf '* cpu%u user samples: %s\n' "$cpu" "${user[*]}" >&2
|
|
printf '* cpu%u nice samples: %s\n' "$cpu" "${nice[*]}" >&2
|
|
printf '* cpu%u system samples: %s\n' "$cpu" "${system[*]}" >&2
|
|
}
|
|
|
|
update_thread_cpus_map() {
|
|
local cpu
|
|
local -g thread_cpus=()
|
|
local reactor_framework
|
|
|
|
((${#cpus[@]} > 0)) || return 1
|
|
|
|
get_thread_stats
|
|
|
|
reactor_framework=$(rpc_cmd framework_get_reactors | jq -r '.reactors[]')
|
|
for cpu in "${cpus[@]}"; do
|
|
for thread in $(jq -r "select(.lcore == $cpu) | .lw_threads[].id" <<< "$reactor_framework"); do
|
|
printf '* Thread %u (%s) on cpu%u\n' "$thread" "${thread_map[thread]}" "$cpu"
|
|
thread_cpus[thread]=$cpu
|
|
done
|
|
done
|
|
((${#thread_cpus[@]} > 0))
|
|
}
|