test/setup: Account for reserved and surplus hugepages

In some occasions the total number of available hugepages increases
by the account of reserved and/or surplus hugepages allocated by the
kernel. This affects the test where it ends up with bigger number of
hugepages then actually requested via setup.sh.

Hugepages overcommitting is disabled under CI nodes, however, this
issue spontaneously appears regardless. To rectify, add the extra
hugepages while checking the expected number + some debug to see the
state of meminfo in case the test still fails.

Signed-off-by: Michal Berger <michal.berger@intel.com>
Change-Id: If30a422240045ac188a9175eeab7210351eb34a6
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/13826
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com>
Reviewed-by: Tomasz Zawadzki <tomasz.zawadzki@intel.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
This commit is contained in:
Michal Berger 2022-08-02 12:26:26 +02:00 committed by Tomasz Zawadzki
parent 1bfe8f98a1
commit 327f69ea41
2 changed files with 33 additions and 4 deletions

View File

@ -19,6 +19,8 @@ get_meminfo() {
mem_f=/proc/meminfo
if [[ -e /sys/devices/system/node/node$node/meminfo ]]; then
mem_f=/sys/devices/system/node/node$node/meminfo
elif [[ -n $node ]]; then
return 1
fi
mapfile -t mem < "$mem_f"
mem=("${mem[@]#Node +([0-9]) }")

View File

@ -87,14 +87,34 @@ verify_nr_hugepages() {
local node
local sorted_t
local sorted_s
local surp
local resv
local anon
if [[ $(< /sys/kernel/mm/transparent_hugepage/enabled) != *"[never]"* ]]; then
anon=$(get_meminfo AnonHugePages)
fi
surp=$(get_meminfo HugePages_Surp)
resv=$(get_meminfo HugePages_Rsvd)
echo "nr_hugepages=$nr_hugepages"
(($(< "$default_huge_nr") == nr_hugepages))
echo "resv_hugepages=$resv"
echo "surplus_hugepages=$surp"
echo "anon_hugepages=${anon:-disabled}"
(($(< "$default_huge_nr") == nr_hugepages + surp + resv))
# This knob doesn't account for the surp, resv hugepages
(($(< "$global_huge_nr") == nr_hugepages))
(($(get_meminfo HugePages_Total) == nr_hugepages))
(($(get_meminfo HugePages_Total) == nr_hugepages + surp + resv))
get_nodes
# Take global resv and per-node surplus hugepages into account
for node in "${!nodes_test[@]}"; do
((nodes_test[node] += resv))
((nodes_test[node] += $(get_meminfo HugePages_Surp "$node")))
done
# There's no obvious way of determining which NUMA node is going to end
# up with an odd number of hugepages in case such number was actually
# allocated by the kernel. Considering that, let's simply check if our
@ -103,7 +123,7 @@ verify_nr_hugepages() {
for node in "${!nodes_test[@]}"; do
sorted_t[nodes_test[node]]=1 sorted_s[nodes_sys[node]]=1
echo "node$node=${nodes_sys[node]}"
echo "node$node=${nodes_sys[node]} expecting ${nodes_test[node]}"
done
[[ ${!sorted_s[*]} == "${!sorted_t[*]}" ]]
}
@ -177,8 +197,15 @@ hp_status() {
while read -r node size free _ total; do
size=${size/kB/} node=${node#node}
((size == default_hugepages)) || continue
((free == nodes_test[node]))
((total == nodes_test[node]))
# If something grabbed hugepages we can't really do anything about it. Just skip the free check and leave
# a big warning.
if ((free != total)); then
printf '* %u free != %u total hugepages. Something is using hugepages, this may affect the test\n' \
"$free" "$total" >&2
continue
fi
((free == nodes_test[node]))
done < <(setup output status |& grep "node[0-9]")
}