Spdk/scripts/perf/nvmf/common.py
Karol Latecki da55cb8756 scripts/nvmf_perf: CPU and power utilization to csv output
Find matching SAR and PM script output files, parse
them and put calculated average from accross all
workload iterations into final CSV with aggregated
results.

Signed-off-by: Karol Latecki <karol.latecki@intel.com>
Change-Id: I9d3c445941e54eefbe93dd47d152d2ede22c9560
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/15467
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Konrad Sztyber <konrad.sztyber@intel.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
Reviewed-by: Michal Berger <michal.berger@intel.com>
2022-12-05 09:42:27 +00:00

209 lines
10 KiB
Python

# SPDX-License-Identifier: BSD-3-Clause
# Copyright (C) 2018 Intel Corporation.
# All rights reserved.
import os
import re
import json
import logging
from subprocess import check_output
from collections import OrderedDict
from json.decoder import JSONDecodeError
def read_json_stats(file):
with open(file, "r") as json_data:
data = json.load(json_data)
job_pos = 0 # job_post = 0 because using aggregated results
# Check if latency is in nano or microseconds to choose correct dict key
def get_lat_unit(key_prefix, dict_section):
# key prefix - lat, clat or slat.
# dict section - portion of json containing latency bucket in question
# Return dict key to access the bucket and unit as string
for k, _ in dict_section.items():
if k.startswith(key_prefix):
return k, k.split("_")[1]
def get_clat_percentiles(clat_dict_leaf):
if "percentile" in clat_dict_leaf:
p99_lat = float(clat_dict_leaf["percentile"]["99.000000"])
p99_9_lat = float(clat_dict_leaf["percentile"]["99.900000"])
p99_99_lat = float(clat_dict_leaf["percentile"]["99.990000"])
p99_999_lat = float(clat_dict_leaf["percentile"]["99.999000"])
return [p99_lat, p99_9_lat, p99_99_lat, p99_999_lat]
else:
# Latest fio versions do not provide "percentile" results if no
# measurements were done, so just return zeroes
return [0, 0, 0, 0]
read_iops = float(data["jobs"][job_pos]["read"]["iops"])
read_bw = float(data["jobs"][job_pos]["read"]["bw"])
lat_key, lat_unit = get_lat_unit("lat", data["jobs"][job_pos]["read"])
read_avg_lat = float(data["jobs"][job_pos]["read"][lat_key]["mean"])
read_min_lat = float(data["jobs"][job_pos]["read"][lat_key]["min"])
read_max_lat = float(data["jobs"][job_pos]["read"][lat_key]["max"])
clat_key, clat_unit = get_lat_unit("clat", data["jobs"][job_pos]["read"])
read_p99_lat, read_p99_9_lat, read_p99_99_lat, read_p99_999_lat = get_clat_percentiles(
data["jobs"][job_pos]["read"][clat_key])
if "ns" in lat_unit:
read_avg_lat, read_min_lat, read_max_lat = [x / 1000 for x in [read_avg_lat, read_min_lat, read_max_lat]]
if "ns" in clat_unit:
read_p99_lat = read_p99_lat / 1000
read_p99_9_lat = read_p99_9_lat / 1000
read_p99_99_lat = read_p99_99_lat / 1000
read_p99_999_lat = read_p99_999_lat / 1000
write_iops = float(data["jobs"][job_pos]["write"]["iops"])
write_bw = float(data["jobs"][job_pos]["write"]["bw"])
lat_key, lat_unit = get_lat_unit("lat", data["jobs"][job_pos]["write"])
write_avg_lat = float(data["jobs"][job_pos]["write"][lat_key]["mean"])
write_min_lat = float(data["jobs"][job_pos]["write"][lat_key]["min"])
write_max_lat = float(data["jobs"][job_pos]["write"][lat_key]["max"])
clat_key, clat_unit = get_lat_unit("clat", data["jobs"][job_pos]["write"])
write_p99_lat, write_p99_9_lat, write_p99_99_lat, write_p99_999_lat = get_clat_percentiles(
data["jobs"][job_pos]["write"][clat_key])
if "ns" in lat_unit:
write_avg_lat, write_min_lat, write_max_lat = [x / 1000 for x in [write_avg_lat, write_min_lat, write_max_lat]]
if "ns" in clat_unit:
write_p99_lat = write_p99_lat / 1000
write_p99_9_lat = write_p99_9_lat / 1000
write_p99_99_lat = write_p99_99_lat / 1000
write_p99_999_lat = write_p99_999_lat / 1000
return [read_iops, read_bw, read_avg_lat, read_min_lat, read_max_lat,
read_p99_lat, read_p99_9_lat, read_p99_99_lat, read_p99_999_lat,
write_iops, write_bw, write_avg_lat, write_min_lat, write_max_lat,
write_p99_lat, write_p99_9_lat, write_p99_99_lat, write_p99_999_lat]
def read_target_stats(measurement_name, results_file_list, results_dir):
# Read additional metrics measurements done on target side and
# calculate the average from across all workload iterations.
# Currently only works for SAR CPU utilization and power draw measurements.
# Other (bwm-ng, pcm, dpdk memory) need to be refactored and provide more
# structured result files instead of a output dump.
total_util = 0
for result_file in results_file_list:
with open(os.path.join(results_dir, result_file), "r") as result_file_fh:
total_util += float(result_file_fh.read())
avg_util = total_util / len(results_file_list)
return {measurement_name: "{0:.3f}".format(avg_util)}
def parse_results(results_dir, csv_file):
files = os.listdir(results_dir)
fio_files = filter(lambda x: ".fio" in x, files)
json_files = [x for x in files if ".json" in x]
sar_files = [x for x in files if "sar" in x and "util" in x]
pm_files = [x for x in files if "pm" in x and "avg" in x]
headers = ["read_iops", "read_bw", "read_avg_lat_us", "read_min_lat_us", "read_max_lat_us",
"read_p99_lat_us", "read_p99.9_lat_us", "read_p99.99_lat_us", "read_p99.999_lat_us",
"write_iops", "write_bw", "write_avg_lat_us", "write_min_lat_us", "write_max_lat_us",
"write_p99_lat_us", "write_p99.9_lat_us", "write_p99.99_lat_us", "write_p99.999_lat_us"]
header_line = ",".join(["Name", *headers])
rows = set()
for fio_config in fio_files:
logging.info("Getting FIO stats for %s" % fio_config)
job_name, _ = os.path.splitext(fio_config)
aggr_headers = ["iops", "bw", "avg_lat_us", "min_lat_us", "max_lat_us",
"p99_lat_us", "p99.9_lat_us", "p99.99_lat_us", "p99.999_lat_us"]
# Look in the filename for rwmixread value. Function arguments do
# not have that information.
# TODO: Improve this function by directly using workload params instead
# of regexing through filenames.
if "read" in job_name:
rw_mixread = 1
elif "write" in job_name:
rw_mixread = 0
else:
rw_mixread = float(re.search(r"m_(\d+)", job_name).group(1)) / 100
# If "_CPU" exists in name - ignore it
# Initiators for the same job could have different num_cores parameter
job_name = re.sub(r"_\d+CPU", "", job_name)
job_result_files = [x for x in json_files if x.startswith(job_name)]
sar_result_files = [x for x in sar_files if x.startswith(job_name)]
pm_result_files = [x for x in pm_files if x.startswith(job_name)]
logging.info("Matching result files for current fio config %s:" % job_name)
for j in job_result_files:
logging.info("\t %s" % j)
# There may have been more than 1 initiator used in test, need to check that
# Result files are created so that string after last "_" separator is server name
inits_names = set([os.path.splitext(x)[0].split("_")[-1] for x in job_result_files])
inits_avg_results = []
for i in inits_names:
logging.info("\tGetting stats for initiator %s" % i)
# There may have been more than 1 test run for this job, calculate average results for initiator
i_results = [x for x in job_result_files if i in x]
i_results_filename = re.sub(r"run_\d+_", "", i_results[0].replace("json", "csv"))
separate_stats = []
for r in i_results:
try:
stats = read_json_stats(os.path.join(results_dir, r))
separate_stats.append(stats)
logging.info(stats)
except JSONDecodeError:
logging.error("ERROR: Failed to parse %s results! Results might be incomplete!" % r)
init_results = [sum(x) for x in zip(*separate_stats)]
init_results = [x / len(separate_stats) for x in init_results]
inits_avg_results.append(init_results)
logging.info("\tAverage results for initiator %s" % i)
logging.info(init_results)
with open(os.path.join(results_dir, i_results_filename), "w") as fh:
fh.write(header_line + "\n")
fh.write(",".join([job_name, *["{0:.3f}".format(x) for x in init_results]]) + "\n")
# Sum results of all initiators running this FIO job.
# Latency results are an average of latencies from accros all initiators.
inits_avg_results = [sum(x) for x in zip(*inits_avg_results)]
inits_avg_results = OrderedDict(zip(headers, inits_avg_results))
for key in inits_avg_results:
if "lat" in key:
inits_avg_results[key] /= len(inits_names)
# Aggregate separate read/write values into common labels
# Take rw_mixread into consideration for mixed read/write workloads.
aggregate_results = OrderedDict()
for h in aggr_headers:
read_stat, write_stat = [float(value) for key, value in inits_avg_results.items() if h in key]
if "lat" in h:
_ = rw_mixread * read_stat + (1 - rw_mixread) * write_stat
else:
_ = read_stat + write_stat
aggregate_results[h] = "{0:.3f}".format(_)
if sar_result_files:
aggr_headers.append("target_avg_cpu_util")
aggregate_results.update(read_target_stats("target_avg_cpu_util", sar_result_files, results_dir))
if pm_result_files:
aggr_headers.append("target_avg_power")
aggregate_results.update(read_target_stats("target_avg_power", pm_result_files, results_dir))
rows.add(",".join([job_name, *aggregate_results.values()]))
# Create empty results file with just the header line
aggr_header_line = ",".join(["Name", *aggr_headers])
with open(os.path.join(results_dir, csv_file), "w") as fh:
fh.write(aggr_header_line + "\n")
# Save results to file
for row in rows:
with open(os.path.join(results_dir, csv_file), "a") as fh:
fh.write(row + "\n")
logging.info("You can find the test results in the file %s" % os.path.join(results_dir, csv_file))