mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-21 23:12:07 +00:00
Max token capacity metric (#2595)
* adding max_token_capacity_metric * added tgi to name of metric * Adding max capacity metric. * Add description for the metrics --------- Co-authored-by: Edwinhr716 <Edandres249@gmail.com>
This commit is contained in:
parent
51506aa57a
commit
967e67111d
@ -101,6 +101,7 @@ pub async fn connect_backend(
|
||||
.map_err(V3Error::Warmup)?,
|
||||
)?;
|
||||
tracing::info!("Setting max batch total tokens to {max_batch_total_tokens}");
|
||||
metrics::gauge!("tgi_batch_max_total_tokens").set(max_batch_total_tokens);
|
||||
|
||||
let backend_info = BackendInfo {
|
||||
waiting_served_ratio,
|
||||
|
@ -1937,6 +1937,11 @@ async fn start(
|
||||
metrics::Unit::Count,
|
||||
"Maximum tokens for the current batch"
|
||||
);
|
||||
metrics::describe_gauge!(
|
||||
"tgi_batch_total_tokens",
|
||||
metrics::Unit::Count,
|
||||
"Maximum amount of tokens in total."
|
||||
);
|
||||
metrics::describe_histogram!(
|
||||
"tgi_request_max_new_tokens",
|
||||
metrics::Unit::Count,
|
||||
|
Loading…
Reference in New Issue
Block a user