mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 04:14:52 +00:00
feat: support FinishReason in streaming and non streaming chat
This commit is contained in:
parent
8c4ab53780
commit
65c913b55d
@ -244,7 +244,7 @@ impl ChatCompletion {
|
||||
content: ouput,
|
||||
},
|
||||
logprobs: None,
|
||||
finish_reason: None,
|
||||
finish_reason: details.finish_reason.to_string().into(),
|
||||
}],
|
||||
usage: Usage {
|
||||
prompt_tokens: details.prompt_token_count,
|
||||
@ -326,7 +326,6 @@ pub(crate) struct ChatRequest {
|
||||
#[serde(default = "default_request_messages")]
|
||||
pub messages: Vec<Message>,
|
||||
|
||||
/// UNUSED
|
||||
/// Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far,
|
||||
/// decreasing the model's likelihood to repeat the same line verbatim.
|
||||
#[serde(default)]
|
||||
@ -444,6 +443,16 @@ pub(crate) enum FinishReason {
|
||||
StopSequence,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for FinishReason {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
FinishReason::Length => write!(f, "length"),
|
||||
FinishReason::EndOfSequenceToken => write!(f, "eos_token"),
|
||||
FinishReason::StopSequence => write!(f, "stop_sequence"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, ToSchema)]
|
||||
pub(crate) struct BestOfSequence {
|
||||
#[schema(example = "test")]
|
||||
|
@ -627,10 +627,10 @@ async fn chat_completions(
|
||||
current_time,
|
||||
stream_token.index,
|
||||
None,
|
||||
None,
|
||||
stream_token.details.map(|d| d.finish_reason.to_string()),
|
||||
))
|
||||
.unwrap_or_else(|_| {
|
||||
println!("Failed to serialize ChatCompletionChunk");
|
||||
.unwrap_or_else(|e| {
|
||||
println!("Failed to serialize ChatCompletionChunk: {:?}", e);
|
||||
Event::default()
|
||||
})
|
||||
};
|
||||
|
Loading…
Reference in New Issue
Block a user