From 4555e8721cbb1bd7a0f6e672afef431ac12bc13b Mon Sep 17 00:00:00 2001 From: drbh Date: Thu, 11 Jan 2024 12:10:34 -0500 Subject: [PATCH] fix: remove duplicate input_length on Details --- router/src/lib.rs | 6 ++---- router/src/server.rs | 3 +-- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/router/src/lib.rs b/router/src/lib.rs index 9716bde4..9213657b 100644 --- a/router/src/lib.rs +++ b/router/src/lib.rs @@ -232,9 +232,9 @@ impl ChatCompletion { finish_reason: details.finish_reason.to_string(), }], usage: Usage { - prompt_tokens: details.input_length, + prompt_tokens: details.prefill.len() as u32, completion_tokens: details.generated_tokens, - total_tokens: details.input_length + details.generated_tokens, + total_tokens: details.prefill.len() as u32 + details.generated_tokens, }, } } @@ -469,8 +469,6 @@ pub(crate) struct Details { pub best_of_sequences: Option>, #[serde(skip_serializing_if = "Vec::is_empty")] pub top_tokens: Vec>, - #[schema(example = 1)] - pub input_length: u32, } #[derive(Serialize, ToSchema)] diff --git a/router/src/server.rs b/router/src/server.rs index e1a15c24..edb4f718 100644 --- a/router/src/server.rs +++ b/router/src/server.rs @@ -208,7 +208,6 @@ async fn generate( seed: response.generated_text.seed, best_of_sequences, top_tokens: response.top_tokens, - input_length: response.input_length, }) } false => None, @@ -604,7 +603,7 @@ async fn chat_completions( truncate: None, watermark: false, details: true, - decoder_input_details: false, + decoder_input_details: true, seed, top_n_tokens: None, },