Return prompt vs generated tokens.

This commit is contained in:
Nicolas Patry 2024-01-11 14:59:53 +00:00
parent da27fbdfdb
commit 5c8cc964fa

View File

@ -170,6 +170,7 @@ async fn generate(
};
// Token details
let prompt_tokens = response.prefill.len();
let details = match details {
true => {
// convert best_of_responses
@ -257,6 +258,11 @@ async fn generate(
"x-time-per-token",
time_per_token.as_millis().to_string().parse().unwrap(),
);
headers.insert("x-prompt-tokens", prompt_tokens.into());
headers.insert(
"x-generated-tokens",
response.generated_text.generated_tokens.into(),
);
// Metrics
metrics::increment_counter!("tgi_request_success");