2023-05-25 11:38:36 +00:00
|
|
|
use crate::app::Data;
|
|
|
|
use tabled::settings::Merge;
|
|
|
|
use tabled::{builder::Builder, settings::Style, Table};
|
|
|
|
|
|
|
|
#[allow(clippy::too_many_arguments)]
|
|
|
|
pub(crate) fn parameters_table(
|
|
|
|
tokenizer_name: String,
|
|
|
|
sequence_length: u32,
|
|
|
|
decode_length: u32,
|
2023-08-28 09:43:47 +00:00
|
|
|
top_n_tokens: Option<u32>,
|
2023-05-25 11:38:36 +00:00
|
|
|
n_runs: usize,
|
|
|
|
warmups: usize,
|
|
|
|
temperature: Option<f32>,
|
|
|
|
top_k: Option<u32>,
|
|
|
|
top_p: Option<f32>,
|
|
|
|
typical_p: Option<f32>,
|
|
|
|
repetition_penalty: Option<f32>,
|
2024-02-08 17:41:25 +00:00
|
|
|
frequency_penalty: Option<f32>,
|
2023-05-25 11:38:36 +00:00
|
|
|
watermark: bool,
|
|
|
|
do_sample: bool,
|
|
|
|
) -> Table {
|
|
|
|
let mut builder = Builder::default();
|
|
|
|
|
|
|
|
builder.set_header(["Parameter", "Value"]);
|
|
|
|
|
|
|
|
builder.push_record(["Model", &tokenizer_name]);
|
|
|
|
builder.push_record(["Sequence Length", &sequence_length.to_string()]);
|
|
|
|
builder.push_record(["Decode Length", &decode_length.to_string()]);
|
2023-08-28 09:43:47 +00:00
|
|
|
builder.push_record(["Top N Tokens", &format!("{top_n_tokens:?}")]);
|
2023-05-25 11:38:36 +00:00
|
|
|
builder.push_record(["N Runs", &n_runs.to_string()]);
|
|
|
|
builder.push_record(["Warmups", &warmups.to_string()]);
|
|
|
|
builder.push_record(["Temperature", &format!("{temperature:?}")]);
|
|
|
|
builder.push_record(["Top K", &format!("{top_k:?}")]);
|
|
|
|
builder.push_record(["Top P", &format!("{top_p:?}")]);
|
|
|
|
builder.push_record(["Typical P", &format!("{typical_p:?}")]);
|
|
|
|
builder.push_record(["Repetition Penalty", &format!("{repetition_penalty:?}")]);
|
2024-02-08 17:41:25 +00:00
|
|
|
builder.push_record(["Frequency Penalty", &format!("{frequency_penalty:?}")]);
|
2023-05-25 11:38:36 +00:00
|
|
|
builder.push_record(["Watermark", &watermark.to_string()]);
|
|
|
|
builder.push_record(["Do Sample", &do_sample.to_string()]);
|
|
|
|
|
|
|
|
let mut table = builder.build();
|
|
|
|
table.with(Style::markdown());
|
|
|
|
table
|
|
|
|
}
|
|
|
|
|
|
|
|
pub(crate) fn latency_table(data: &Data) -> Table {
|
|
|
|
let mut builder = Builder::default();
|
|
|
|
|
|
|
|
builder.set_header([
|
|
|
|
"Step",
|
|
|
|
"Batch Size",
|
|
|
|
"Average",
|
|
|
|
"Lowest",
|
|
|
|
"Highest",
|
|
|
|
"p50",
|
|
|
|
"p90",
|
|
|
|
"p99",
|
|
|
|
]);
|
|
|
|
|
|
|
|
add_latencies(
|
|
|
|
&mut builder,
|
|
|
|
"Prefill",
|
|
|
|
&data.batch_size,
|
|
|
|
&data.prefill_latencies,
|
|
|
|
);
|
|
|
|
add_latencies(
|
|
|
|
&mut builder,
|
|
|
|
"Decode (token)",
|
|
|
|
&data.batch_size,
|
|
|
|
&data.decode_token_latencies,
|
|
|
|
);
|
|
|
|
add_latencies(
|
|
|
|
&mut builder,
|
|
|
|
"Decode (total)",
|
|
|
|
&data.batch_size,
|
|
|
|
&data.decode_latencies,
|
|
|
|
);
|
|
|
|
|
|
|
|
let mut table = builder.build();
|
|
|
|
table.with(Style::markdown()).with(Merge::vertical());
|
|
|
|
table
|
|
|
|
}
|
|
|
|
|
|
|
|
pub(crate) fn throughput_table(data: &Data) -> Table {
|
|
|
|
let mut builder = Builder::default();
|
|
|
|
|
|
|
|
builder.set_header(["Step", "Batch Size", "Average", "Lowest", "Highest"]);
|
|
|
|
|
|
|
|
add_throuhgputs(
|
|
|
|
&mut builder,
|
|
|
|
"Prefill",
|
|
|
|
&data.batch_size,
|
|
|
|
&data.prefill_throughputs,
|
|
|
|
);
|
|
|
|
add_throuhgputs(
|
|
|
|
&mut builder,
|
|
|
|
"Decode",
|
|
|
|
&data.batch_size,
|
|
|
|
&data.decode_throughputs,
|
|
|
|
);
|
|
|
|
|
|
|
|
let mut table = builder.build();
|
|
|
|
table.with(Style::markdown()).with(Merge::vertical());
|
|
|
|
table
|
|
|
|
}
|
|
|
|
|
|
|
|
fn add_latencies(
|
|
|
|
builder: &mut Builder,
|
|
|
|
step: &'static str,
|
|
|
|
batch_size: &[u32],
|
|
|
|
batch_latencies: &[Vec<f64>],
|
|
|
|
) {
|
|
|
|
for (i, b) in batch_size.iter().enumerate() {
|
|
|
|
let latencies = &batch_latencies[i];
|
|
|
|
let (avg, min, max) = avg_min_max(latencies);
|
|
|
|
|
|
|
|
let row = [
|
|
|
|
step,
|
|
|
|
&b.to_string(),
|
|
|
|
&format_value(avg, "ms"),
|
|
|
|
&format_value(min, "ms"),
|
|
|
|
&format_value(max, "ms"),
|
|
|
|
&format_value(px(latencies, 50), "ms"),
|
|
|
|
&format_value(px(latencies, 90), "ms"),
|
|
|
|
&format_value(px(latencies, 99), "ms"),
|
|
|
|
];
|
|
|
|
|
|
|
|
builder.push_record(row);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn add_throuhgputs(
|
|
|
|
builder: &mut Builder,
|
|
|
|
step: &'static str,
|
|
|
|
batch_size: &[u32],
|
|
|
|
batch_throughputs: &[Vec<f64>],
|
|
|
|
) {
|
|
|
|
for (i, b) in batch_size.iter().enumerate() {
|
|
|
|
let throughputs = &batch_throughputs[i];
|
|
|
|
let (avg, min, max) = avg_min_max(throughputs);
|
|
|
|
|
|
|
|
let row = [
|
|
|
|
step,
|
|
|
|
&b.to_string(),
|
|
|
|
&format_value(avg, "tokens/secs"),
|
|
|
|
&format_value(min, "tokens/secs"),
|
|
|
|
&format_value(max, "tokens/secs"),
|
|
|
|
];
|
|
|
|
|
|
|
|
builder.push_record(row);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-03-29 18:17:24 +00:00
|
|
|
fn avg_min_max(data: &[f64]) -> (f64, f64, f64) {
|
2023-05-25 11:38:36 +00:00
|
|
|
let average = data.iter().sum::<f64>() / data.len() as f64;
|
|
|
|
let min = data
|
|
|
|
.iter()
|
|
|
|
.min_by(|a, b| a.total_cmp(b))
|
2024-06-17 14:40:44 +00:00
|
|
|
.unwrap_or(&f64::NAN);
|
2023-05-25 11:38:36 +00:00
|
|
|
let max = data
|
|
|
|
.iter()
|
|
|
|
.max_by(|a, b| a.total_cmp(b))
|
2024-06-17 14:40:44 +00:00
|
|
|
.unwrap_or(&f64::NAN);
|
2023-05-25 11:38:36 +00:00
|
|
|
(average, *min, *max)
|
|
|
|
}
|
|
|
|
|
2024-03-29 18:17:24 +00:00
|
|
|
fn px(data: &[f64], p: u32) -> f64 {
|
2023-05-25 11:38:36 +00:00
|
|
|
let i = (f64::from(p) / 100.0 * data.len() as f64) as usize;
|
2024-06-17 14:40:44 +00:00
|
|
|
*data.get(i).unwrap_or(&f64::NAN)
|
2023-05-25 11:38:36 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
fn format_value(value: f64, unit: &'static str) -> String {
|
|
|
|
format!("{:.2} {unit}", value)
|
|
|
|
}
|