From 967e67111dbce8711d9f43ad657513d56ac52de9 Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Wed, 2 Oct 2024 16:32:36 +0200 Subject: [PATCH] Max token capacity metric (#2595) * adding max_token_capacity_metric * added tgi to name of metric * Adding max capacity metric. * Add description for the metrics --------- Co-authored-by: Edwinhr716 --- backends/v3/src/lib.rs | 1 + router/src/server.rs | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/backends/v3/src/lib.rs b/backends/v3/src/lib.rs index f3372923..8913e40b 100644 --- a/backends/v3/src/lib.rs +++ b/backends/v3/src/lib.rs @@ -101,6 +101,7 @@ pub async fn connect_backend( .map_err(V3Error::Warmup)?, )?; tracing::info!("Setting max batch total tokens to {max_batch_total_tokens}"); + metrics::gauge!("tgi_batch_max_total_tokens").set(max_batch_total_tokens); let backend_info = BackendInfo { waiting_served_ratio, diff --git a/router/src/server.rs b/router/src/server.rs index cc896f99..73b54321 100644 --- a/router/src/server.rs +++ b/router/src/server.rs @@ -1937,6 +1937,11 @@ async fn start( metrics::Unit::Count, "Maximum tokens for the current batch" ); + metrics::describe_gauge!( + "tgi_batch_total_tokens", + metrics::Unit::Count, + "Maximum amount of tokens in total." + ); metrics::describe_histogram!( "tgi_request_max_new_tokens", metrics::Unit::Count,