Return prompt vs generated tokens.

This commit is contained in:
Nicolas Patry 2024-01-11 14:59:53 +00:00
parent da27fbdfdb
commit 5c8cc964fa

View File

@ -170,6 +170,7 @@ async fn generate(
}; };
// Token details // Token details
let prompt_tokens = response.prefill.len();
let details = match details { let details = match details {
true => { true => {
// convert best_of_responses // convert best_of_responses
@ -257,6 +258,11 @@ async fn generate(
"x-time-per-token", "x-time-per-token",
time_per_token.as_millis().to_string().parse().unwrap(), time_per_token.as_millis().to_string().parse().unwrap(),
); );
headers.insert("x-prompt-tokens", prompt_tokens.into());
headers.insert(
"x-generated-tokens",
response.generated_text.generated_tokens.into(),
);
// Metrics // Metrics
metrics::increment_counter!("tgi_request_success"); metrics::increment_counter!("tgi_request_success");