mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 04:14:52 +00:00
Return prompt vs generated tokens.
This commit is contained in:
parent
da27fbdfdb
commit
5c8cc964fa
@ -170,6 +170,7 @@ async fn generate(
|
||||
};
|
||||
|
||||
// Token details
|
||||
let prompt_tokens = response.prefill.len();
|
||||
let details = match details {
|
||||
true => {
|
||||
// convert best_of_responses
|
||||
@ -257,6 +258,11 @@ async fn generate(
|
||||
"x-time-per-token",
|
||||
time_per_token.as_millis().to_string().parse().unwrap(),
|
||||
);
|
||||
headers.insert("x-prompt-tokens", prompt_tokens.into());
|
||||
headers.insert(
|
||||
"x-generated-tokens",
|
||||
response.generated_text.generated_tokens.into(),
|
||||
);
|
||||
|
||||
// Metrics
|
||||
metrics::increment_counter!("tgi_request_success");
|
||||
|
Loading…
Reference in New Issue
Block a user