From 0204946d26d4790c230c4aa4b35e261fd598fd5f Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Wed, 2 Oct 2024 16:32:36 +0200 Subject: [PATCH] Max token capacity metric (#2595) * adding max_token_capacity_metric * added tgi to name of metric * Adding max capacity metric. * Add description for the metrics --------- Co-authored-by: Edwinhr716 --- backends/v3/src/lib.rs | 1 + router/src/server.rs | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/backends/v3/src/lib.rs b/backends/v3/src/lib.rs index 77a9a11ac..af66b21eb 100644 --- a/backends/v3/src/lib.rs +++ b/backends/v3/src/lib.rs @@ -100,6 +100,7 @@ pub async fn connect_backend( .map_err(V3Error::Warmup)?, )?; tracing::info!("Setting max batch total tokens to {max_batch_total_tokens}"); + metrics::gauge!("tgi_batch_max_total_tokens").set(max_batch_total_tokens); let backend_info = BackendInfo { waiting_served_ratio, diff --git a/router/src/server.rs b/router/src/server.rs index cc896f996..73b543218 100644 --- a/router/src/server.rs +++ b/router/src/server.rs @@ -1937,6 +1937,11 @@ async fn start( metrics::Unit::Count, "Maximum tokens for the current batch" ); + metrics::describe_gauge!( + "tgi_batch_total_tokens", + metrics::Unit::Count, + "Maximum amount of tokens in total." + ); metrics::describe_histogram!( "tgi_request_max_new_tokens", metrics::Unit::Count,