Adding max capacity metric.

This commit is contained in:
Nicolas Patry 2024-10-01 16:34:13 +02:00
parent 3b0b66bedb
commit 6e7189f2aa
No known key found for this signature in database
GPG Key ID: 64AF4752B2967863
2 changed files with 1 additions and 2 deletions

View File

@ -100,6 +100,7 @@ pub async fn connect_backend(
.map_err(V3Error::Warmup)?,
)?;
tracing::info!("Setting max batch total tokens to {max_batch_total_tokens}");
metrics::gauge!("tgi_batch_max_total_tokens").set(max_batch_total_tokens);
let backend_info = BackendInfo {
waiting_served_ratio,

View File

@ -1993,8 +1993,6 @@ async fn start(
"Batch size of the next batch"
);
metrics::counter!("tgi_max_token_capacity").increment(u64::from(max_batch_total_tokens));
// CORS layer
let allow_origin = allow_origin.unwrap_or(AllowOrigin::any());
let cors_layer = CorsLayer::new()