diff --git a/backends/v3/src/lib.rs b/backends/v3/src/lib.rs index 77a9a11a..af66b21e 100644 --- a/backends/v3/src/lib.rs +++ b/backends/v3/src/lib.rs @@ -100,6 +100,7 @@ pub async fn connect_backend( .map_err(V3Error::Warmup)?, )?; tracing::info!("Setting max batch total tokens to {max_batch_total_tokens}"); + metrics::gauge!("tgi_batch_max_total_tokens").set(max_batch_total_tokens); let backend_info = BackendInfo { waiting_served_ratio, diff --git a/router/src/server.rs b/router/src/server.rs index a6b69321..cc896f99 100644 --- a/router/src/server.rs +++ b/router/src/server.rs @@ -1993,8 +1993,6 @@ async fn start( "Batch size of the next batch" ); - metrics::counter!("tgi_max_token_capacity").increment(u64::from(max_batch_total_tokens)); - // CORS layer let allow_origin = allow_origin.unwrap_or(AllowOrigin::any()); let cors_layer = CorsLayer::new()