diff --git a/scripts/perf/pm/collect-bmc-pm b/scripts/perf/pm/collect-bmc-pm new file mode 100755 index 000000000..3a71d0af0 --- /dev/null +++ b/scripts/perf/pm/collect-bmc-pm @@ -0,0 +1,429 @@ +#!/usr/bin/env bash +set -e + +hex() { printf '0x%02x\n' "$@"; } + +is_root() { + # Talking to local BMC device requires root privileges + if ((UID)); then + printf '%s, you need to be root to run this script\n' "$USER" >&2 + return 1 + fi + +} + +is_ipmitool() { + if ! type -P ipmitool; then + printf 'ipmitool not detected, cannot run commands against the BMC\n' >&2 + return 1 + fi +} + +ipmi_load() { + # Silently attempt to load core ipmi drivers - we will pick up the device later on. + modprobe -qa ipmi_si ipmi_devintf ipmi_msghandler || return 0 +} + +ipmi_supported() { + # Verify if kernel detected and registered at least one BMC under + # the ipmi platform. Look for KCS specifically as this the type + # of the interface the script was tested against. + + local ipmi=/sys/class/ipmi/ipmi0 + + # Keep these details global for easy access if needed. + local -g man_id prod_id dev_id ipmi_ver platform board ipmitool + + ipmi_load + + if [[ ! -e $ipmi ]]; then + printf 'BMC not detected. Please, make sure your platform is IPMI-compatible\n' + return 1 + fi >&2 + + type=$(< "$ipmi/device/type") + + if [[ $type != kcs ]]; then + printf 'No supported BMC interface detected (%s) - only KCS is supported\n' "$type" + return 1 + fi >&2 + + man_id=$(< "$ipmi/device/bmc/manufacturer_id") + prod_id=$(< "$ipmi/device/bmc/product_id") + dev_id=$(hex "$(< "$ipmi/device/bmc/device_id")") + ipmi_ver=$(< "$ipmi/device/bmc/ipmi_version") + + if [[ -e /sys/class/dmi/id/board_vendor ]]; then + platform=$(< /sys/class/dmi/id/board_vendor) + fi + + if [[ -e /sys/class/dmi/id/board_name ]]; then + board=$(< /sys/class/dmi/id/board_name) + fi + + # Keep output similar to ipmi_si's + cat <<- BMC_DEV >&2 + + BMC detected, details below: + Manufacturer ID: $man_id + Product ID: $prod_id + Device ID: $dev_id + IPMI Version: $ipmi_ver + Platform: ${platform:-unknown} + Board: ${board:-unknown} + + BMC_DEV + + # Verify if we have proper tools to work with + ipmitool=$(is_ipmitool) +} + +ipmiraw() { + # For the majority of commands we use raw payload to not depend on specific ipmitool version + # and the way how it interprets/parses the returned data. This also allows us to inspect the + # integrity of data more closely to make sure we don't report nonsensical values to the user. + + local rsp + + rsp=($("$ipmitool" raw "$@" 2> /dev/null)) + # Slap hex prefix to work with proper base + rsp=("${rsp[@]/#/0x}") + + hex "${rsp[@]}" +} + +dcmiraw() { + local cmd=$1 data=("${@:2}") + + ipmiraw 0x2c "$cmd" 0xdc "${data[@]}" +} + +print_dcmi_available_time_periods() { + local time_periods=${enhanced_power_attr[4]} + local -g available_time_periods=() + local -g available_time_periods_in_seconds=() + + available_time_periods[0]="NOW" + + if ((time_periods > 0)); then + local time_idx=5 + local offset=$time_idx + local units unit time time_s units_mask=0xc0 to_sec + + units[0x0]=seconds + units[0x1]=minutes + units[0x2]=hours + units[0x3]=days + + to_sec[0x0]=1 + to_sec[0x1]=60 + to_sec[0x2]=3600 + to_sec[0x3]=86400 + + while ((offset < time_idx + time_periods)); do + time=$((enhanced_power_attr[offset] & ~units_mask)) + unit=${units[enhanced_power_attr[offset] >> 6]:-unknown} + time_s=$((time * to_sec[enhanced_power_attr[offset] >> 6])) + if ((time != 0)); then + available_time_periods[offset]="$time $unit" + available_time_periods_in_seconds[time_s]=${enhanced_power_attr[offset]} + fi + ((++offset)) + done + fi + cat <<- TIME_PERIODS >&2 + + Available averaging time periods to request: + $(printf ' - %s\n' "${available_time_periods[@]}") + + TIME_PERIODS +} + +dcmi_power_support() { + # Verify if the BMC conforms to the DCMI spec + local rsp + + # Table 6-2, Get DCMI Capabilities Command Format + if ! rsp=($(dcmiraw 0x1 0x1)); then + printf 'Cannot determine if BMC supports DCMI Power Management capability\n' >&2 + return 1 + fi + + # Table 6-3, DCMI Capabilities Parameters: + # - Supported DCMI Capabilities: + # - Byte 2 Platform capabilities: [0] Power management + if ((!(rsp[5] & (1 << 0)))); then + printf 'BMC does not provide DCMI Power Mangament capability\n' >&2 + return 1 + fi + + # Check if BMC provides Enhanced System Power Statistics attributes - this allows to issue + # requests for power readings at averaging time period, .e.g. from last 5 seconds, 30 minutes, + # 1 hour and so on. With this we can provide more detailed view on power usage within a + # specific period of time. Without it, we need to depend only on current reading that should + # be always available (the "NOW" reading). + + local -g enhanced_power_attr=() + + # Table 6-3, DCMI Capabilities Parameters: + # - Enhanced System Power Statistics attributes + if enhanced_power_attr=($(dcmiraw 0x1 0x5)); then + print_dcmi_available_time_periods + fi + + printf 'Using DCMI Power Management\n' >&2 +} + +sdr_power_support() { + # This is a fallback which only some platforms may provide (confirmed PowerEdge and CYP). + # We are looking for a full, threshold sensor which reports overall power usage in Watts. + # Different BMCs may have SDRs which describe such sensor(s) differently so this is not + # 100% reliable. To make sure we pick up a proper sensor we also narrow it down to a + # specific entity (System Board or Power Supply). Readings from the sensor should be + # considered as "NOW" readings (without access to min, max readings). + + local -g power_sensors=() + local sensor entity unit status + + # Cache SDR to speed up sensor readings + if [[ ! -f $sdr_cache ]]; then + printf 'Saving SDR cache at %s\n' "$sdr_cache" >&2 + "$ipmitool" sdr dump "$sdr_cache" > /dev/null + fi + + if ((${#extra_power_sensors[@]} > 0)); then + power_sensors+=("${extra_power_sensors[@]}") + fi + + while IFS="," read -r sensor _ unit status _ entity _; do + [[ $unit == Watts && $status == ok ]] || continue + [[ $entity == "System Board" || $entity == "Power Supply" ]] || continue + power_sensors+=("$sensor") + done < <("$ipmitool" -S "$sdr_cache" -vc sdr list full 2>&1) + + if ((${#power_sensors[@]} > 0)); then + printf 'Using SDR (Power sensors: %s)\n' "${power_sensors[*]}" + else + printf 'Cannot locate power sensors\n' + return 1 + fi >&2 +} + +power_support() { + local -g support + + if [[ $interface == dcmi || $interface == sdr ]]; then + # override + "${interface}_power_support" + support=$interface + elif dcmi_power_support; then + support=dcmi + elif sdr_power_support; then + support=sdr + else + printf 'BMC does not provide Power Management support, cannot gather power measurements\n' >&2 + return 1 + fi +} + +get_dcmi_now_reading() { + local rsp reading=0 max min avg ts timeframe mode=01h + local get_cmd get_avg=0 print + + # Table 6-16, Get Power Reading Command: + get_cmd=(0x2 0x1 0x0 0x0) + + if [[ -n ${available_time_periods_in_seconds[interval]} ]]; then + get_cmd=(0x2 0x2 "${available_time_periods_in_seconds[interval]}" 0x0) + get_avg=1 + mode=02h + fi + + # We use System Power Statistics mode to get the "NOW" reading by default. In case + # interval matches one supported by Enhanced System Power Statistics we use that + # mode to obtain extra min, max, avg statistics. + + if ! rsp=($(dcmiraw "${get_cmd[@]}")); then + printf 'DCMI reading: error\n' + else + # Note that the BMC timestamp depends on the hwclock setup which we then attempt + # to represent in UTC. + ts=$((rsp[12] << 24 | rsp[11] << 16 | rsp[10] << 8 | rsp[9])) + # This is interpreted differently by different BMCs so for now we make a note of + # it but don't present it to the user. + timeframe=$((rsp[16] << 24 | rsp[15] << 16 | rsp[14] << 8 | rsp[13])) + reading=$((rsp[2] << 8 | rsp[1])) + if ((get_avg == 1)); then + min=$((rsp[4] << 8 | rsp[3])) + max=$((rsp[6] << 8 | rsp[5])) + avg=$((rsp[8] << 8 | rsp[7])) + _DCMI_min+=("$min") + _DCMI_max+=("$max") + _DCMI_avg+=("$avg") + power_readings["DCMI_MIN"]="_DCMI_min[@]" + power_readings["DCMI_MAX"]="_DCMI_max[@]" + power_readings["DCMI_AVG"]="_DCMI_avg[@]" + fi + _DCMI+=("$reading") + power_readings["DCMI"]="_DCMI[@]" + + for print in min max avg reading; do + [[ -n ${!print} ]] || continue + printf '(%s) DCMI %s (mode: %s): %u Watts (interval: %us)\n' \ + "$(utc "$ts")" \ + "$print" \ + "$mode" \ + "${!print}" \ + "$interval" + done + fi >&2 +} + +get_sdr_now_reading() { + local sensor reading=0 ts unit + + if ((${#power_sensors[@]} == 0)); then + printf 'No power sensors were provided\n' >&2 + return 1 + fi + + for sensor in "${!power_sensors[@]}"; do + ts=$(utc) + if ! IFS="," read -r _ reading unit _; then + reading=error + else + eval "_sensor${sensor}_readings+=($reading)" + power_readings["${power_sensors[sensor]}"]="_sensor${sensor}_readings[@]" + reading+=" $unit" + fi < <("$ipmitool" -c -S "$sdr_cache" sdr get "${power_sensors[sensor]}") 2> /dev/null + printf '(%s) Sensor %s reading: %s (interval %us)\n' \ + "$ts" \ + "${power_sensors[sensor]}" \ + "$reading" \ + "$interval" >&2 + done +} + +get_now_reading() { + case "$support" in + dcmi) get_dcmi_now_reading ;; + sdr) get_sdr_now_reading ;; + *) ;; + esac +} + +dump_readings() { + local sensor reading readings avg total + + ((${#power_readings[@]} > 0)) || return 1 + printf 'Dumping average sensors reading from %s\n' "${!power_readings[*]}" >&2 + + for sensor in "${!power_readings[@]}"; do + readings=("${!power_readings["$sensor"]}") + if ((${#readings[@]} == 0)); then + printf 'No readings available for %s sensor\n' "$sensor" >&2 + continue + fi + total=0 + for reading in "${readings[@]}"; do + ((total += ${reading%.*})) + done + avg=$((total / ${#readings[@]})) + printf '%u\n' "$avg" > "$output_dir/${prefix:+${prefix}_}avg_${sensor}.bmc.pm.txt" + printf '%u\n' "${readings[@]}" > "$output_dir/${prefix:+${prefix}_}all_${sensor}.bmc.pm.txt" + printf 'Dumped avg to %s\n' "$output_dir/${prefix:+${prefix}_}avg_${sensor}.bmc.pm.txt" >&2 + printf 'Dumped all to %s\n' "$output_dir/${prefix:+${prefix}_}all_${sensor}.bmc.pm.txt" >&2 + done +} + +utc() { + date --utc ${1:+-"d@$1"} +} + +cleanup() { + [[ -f $sdr_cache && $remove_sdr_cache == yes ]] && rm "$sdr_cache" + dump_readings +} + +collect_readings() { + local _count=$count + while ((count <= 0 ? 1 : _count--)); do + get_now_reading + sleep "${interval}s" + done +} + +help() { + cat <<- HELP + + Usage: $0 [-h] [-d dir] [-i sdr|dcmi] [-s SENSOR_NAME] [-t interval] [-l log_file] [-p prefix] [-c count] + + -h - Print this message. + -d - Directory where the results should be saved. Default is /tmp. + -i - Type of interface to use for requesting power usage. "sdr" or "dcmi". + If not set, available interface is used ("dcmi" has priority). + -t - How long to wait before each get power command in seconds. In case + this value matches one of supported averaging time periods special + variant of the command will be used to obtain the reading - this + variant is used only with the "dcmi" interface. Default is 1s. + -s - In case "sdr" interface is in use, try to read data from SENSOR_NAME. + -x - In case "sdr" interface is in use, don't remove SDR cache. This can + speed up subsequent runs of the script. + -l - Save output of the script to a log file (dir/${0##*/}.bmc.pm.log). + -p - Add prefix to saved files. + -c - Read power usage count times. 0 is the default and it means to run + indefinitely. + + When started, ${0##*/} will enter loop to continuously read power usage from either + DCMI interface or dedicated Watts sensors every interval. Each reading will be + logged to stderr. Upon termination, average power usage will be dumped to /tmp or + directory set by -d. + + HELP +} + +is_root + +output_dir=/tmp +interval=1 +remove_sdr_cache=yes +log_to_file=no +prefix="" +count=0 + +declare -A power_readings=() +declare -a extra_power_sensors=() + +while getopts :hi:s:d:t:xlp:c: arg; do + case "$arg" in + h) + help + exit 0 + ;; + d) output_dir=$OPTARG ;; + s) extra_power_sensors+=("$OPTARG") ;; + i) interface=${OPTARG,,} ;; + t) interval=$OPTARG ;; + x) remove_sdr_cache=no ;; + l) log_to_file=yes ;; + p) prefix=$OPTARG ;; + c) count=$OPTARG ;; + *) ;; + esac +done + +declare -r sdr_cache=$output_dir/sdr.cache +declare -r log_file=${prefix:+${prefix}_}${0##*/}.bmc.pm.log + +mkdir -p "$output_dir" +if [[ $log_to_file == yes ]]; then + printf 'Redirecting to %s\n' "$output_dir/$log_file" >&2 + exec > "$output_dir/$log_file" 2>&1 +fi + +trap 'cleanup' EXIT + +ipmi_supported +power_support + +collect_readings