From 5c8cc964fae2b816aa234f34a984781a2af9efaf Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Thu, 11 Jan 2024 14:59:53 +0000 Subject: [PATCH] Return prompt vs generated tokens. --- router/src/server.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/router/src/server.rs b/router/src/server.rs index fe1b8309..ef1f1cea 100644 --- a/router/src/server.rs +++ b/router/src/server.rs @@ -170,6 +170,7 @@ async fn generate( }; // Token details + let prompt_tokens = response.prefill.len(); let details = match details { true => { // convert best_of_responses @@ -257,6 +258,11 @@ async fn generate( "x-time-per-token", time_per_token.as_millis().to_string().parse().unwrap(), ); + headers.insert("x-prompt-tokens", prompt_tokens.into()); + headers.insert( + "x-generated-tokens", + response.generated_text.generated_tokens.into(), + ); // Metrics metrics::increment_counter!("tgi_request_success");