diff --git a/backends/v3/src/lib.rs b/backends/v3/src/lib.rs index f3372923..8913e40b 100644 --- a/backends/v3/src/lib.rs +++ b/backends/v3/src/lib.rs @@ -101,6 +101,7 @@ pub async fn connect_backend( .map_err(V3Error::Warmup)?, )?; tracing::info!("Setting max batch total tokens to {max_batch_total_tokens}"); + metrics::gauge!("tgi_batch_max_total_tokens").set(max_batch_total_tokens); let backend_info = BackendInfo { waiting_served_ratio, diff --git a/router/src/server.rs b/router/src/server.rs index cc896f99..73b54321 100644 --- a/router/src/server.rs +++ b/router/src/server.rs @@ -1937,6 +1937,11 @@ async fn start( metrics::Unit::Count, "Maximum tokens for the current batch" ); + metrics::describe_gauge!( + "tgi_batch_total_tokens", + metrics::Unit::Count, + "Maximum amount of tokens in total." + ); metrics::describe_histogram!( "tgi_request_max_new_tokens", metrics::Unit::Count,