mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 04:14:52 +00:00
fix: remove duplicate input_length on Details
This commit is contained in:
parent
c63551fad7
commit
4555e8721c
@ -232,9 +232,9 @@ impl ChatCompletion {
|
||||
finish_reason: details.finish_reason.to_string(),
|
||||
}],
|
||||
usage: Usage {
|
||||
prompt_tokens: details.input_length,
|
||||
prompt_tokens: details.prefill.len() as u32,
|
||||
completion_tokens: details.generated_tokens,
|
||||
total_tokens: details.input_length + details.generated_tokens,
|
||||
total_tokens: details.prefill.len() as u32 + details.generated_tokens,
|
||||
},
|
||||
}
|
||||
}
|
||||
@ -469,8 +469,6 @@ pub(crate) struct Details {
|
||||
pub best_of_sequences: Option<Vec<BestOfSequence>>,
|
||||
#[serde(skip_serializing_if = "Vec::is_empty")]
|
||||
pub top_tokens: Vec<Vec<Token>>,
|
||||
#[schema(example = 1)]
|
||||
pub input_length: u32,
|
||||
}
|
||||
|
||||
#[derive(Serialize, ToSchema)]
|
||||
|
@ -208,7 +208,6 @@ async fn generate(
|
||||
seed: response.generated_text.seed,
|
||||
best_of_sequences,
|
||||
top_tokens: response.top_tokens,
|
||||
input_length: response.input_length,
|
||||
})
|
||||
}
|
||||
false => None,
|
||||
@ -604,7 +603,7 @@ async fn chat_completions(
|
||||
truncate: None,
|
||||
watermark: false,
|
||||
details: true,
|
||||
decoder_input_details: false,
|
||||
decoder_input_details: true,
|
||||
seed,
|
||||
top_n_tokens: None,
|
||||
},
|
||||
|
Loading…
Reference in New Issue
Block a user