add metrics

2025-09-10 11:54:52 +00:00 · 2023-04-19 18:39:44 +02:00 · 2023-04-19 18:39:44 +02:00 · 521f6203d1
commit 521f6203d1
parent ca98470cff
2 changed files with 7 additions and 2 deletions
--- a/router/src/infer.rs
+++ b/router/src/infer.rs
@ -446,7 +446,10 @@ fn filter_send_generations(generations: Vec<Generation>, entries: &mut IntMap<u6
        // Send generation responses back to the infer task
        // If the receive an error from the Flume channel, it means that the client dropped the
        // request and we need to stop generating hence why we unwrap_or(true)
-        let stopped = send_responses(generation, entry).unwrap_or(true);
+        let stopped = send_responses(generation, entry).map_err(|err| {
+            metrics::increment_counter!("tgi_request_failure", "err" => "dropped");
+            err
+        }).unwrap_or(true);
        if stopped {
            entries.remove(&id).expect("ID not found in entries. This is a bug.");
        }
--- a/router/src/queue.rs
+++ b/router/src/queue.rs
@ -161,6 +161,7 @@ impl State {
            // Filter entries where the response receiver was dropped (== entries where the request
            // was dropped by the client)
            if entry.response_tx.is_disconnected() {
+                metrics::increment_counter!("tgi_request_failure", "err" => "dropped");
                continue;
            }

@ -190,6 +191,8 @@ impl State {
            }
        }

+        metrics::gauge!("tgi_queue_size", self.entries.len() as f64);
+
        // Maybe all entries were dropped because their channel were closed
        if batch_requests.is_empty() {
            return None;
@ -207,7 +210,6 @@ impl State {
        // Increment batch id
        self.next_batch_id += 1;

-        metrics::gauge!("tgi_queue_size", self.entries.len() as f64);
        metrics::histogram!("tgi_batch_next_size", batch.size as f64);
        Some((batch_entries, batch, next_batch_span))
    }