Max token capacity metric (#2595)

* adding max_token_capacity_metric * added tgi to name of metric * Adding max capacity metric. * Add description for the metrics --------- Co-authored-by: Edwinhr716 <Edandres249@gmail.com>
2025-09-11 20:34:54 +00:00 · 2024-10-02 16:32:36 +02:00 · 2024-10-02 16:32:36 +02:00 · 967e67111d
commit 967e67111d
parent 51506aa57a
2 changed files with 6 additions and 0 deletions
--- a/backends/v3/src/lib.rs
+++ b/backends/v3/src/lib.rs
@ -101,6 +101,7 @@ pub async fn connect_backend(
            .map_err(V3Error::Warmup)?,
    )?;
    tracing::info!("Setting max batch total tokens to {max_batch_total_tokens}");
+    metrics::gauge!("tgi_batch_max_total_tokens").set(max_batch_total_tokens);

    let backend_info = BackendInfo {
        waiting_served_ratio,
--- a/router/src/server.rs
+++ b/router/src/server.rs
@ -1937,6 +1937,11 @@ async fn start(
        metrics::Unit::Count,
        "Maximum tokens for the current batch"
    );
+    metrics::describe_gauge!(
+        "tgi_batch_total_tokens",
+        metrics::Unit::Count,
+        "Maximum amount of tokens in total."
+    );
    metrics::describe_histogram!(
        "tgi_request_max_new_tokens",
        metrics::Unit::Count,