mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 04:14:52 +00:00
feat: support FinishReason in streaming and non streaming chat
This commit is contained in:
parent
8c4ab53780
commit
65c913b55d
@ -244,7 +244,7 @@ impl ChatCompletion {
|
|||||||
content: ouput,
|
content: ouput,
|
||||||
},
|
},
|
||||||
logprobs: None,
|
logprobs: None,
|
||||||
finish_reason: None,
|
finish_reason: details.finish_reason.to_string().into(),
|
||||||
}],
|
}],
|
||||||
usage: Usage {
|
usage: Usage {
|
||||||
prompt_tokens: details.prompt_token_count,
|
prompt_tokens: details.prompt_token_count,
|
||||||
@ -326,7 +326,6 @@ pub(crate) struct ChatRequest {
|
|||||||
#[serde(default = "default_request_messages")]
|
#[serde(default = "default_request_messages")]
|
||||||
pub messages: Vec<Message>,
|
pub messages: Vec<Message>,
|
||||||
|
|
||||||
/// UNUSED
|
|
||||||
/// Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far,
|
/// Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far,
|
||||||
/// decreasing the model's likelihood to repeat the same line verbatim.
|
/// decreasing the model's likelihood to repeat the same line verbatim.
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
@ -444,6 +443,16 @@ pub(crate) enum FinishReason {
|
|||||||
StopSequence,
|
StopSequence,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl std::fmt::Display for FinishReason {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
match self {
|
||||||
|
FinishReason::Length => write!(f, "length"),
|
||||||
|
FinishReason::EndOfSequenceToken => write!(f, "eos_token"),
|
||||||
|
FinishReason::StopSequence => write!(f, "stop_sequence"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Serialize, ToSchema)]
|
#[derive(Serialize, ToSchema)]
|
||||||
pub(crate) struct BestOfSequence {
|
pub(crate) struct BestOfSequence {
|
||||||
#[schema(example = "test")]
|
#[schema(example = "test")]
|
||||||
|
@ -627,10 +627,10 @@ async fn chat_completions(
|
|||||||
current_time,
|
current_time,
|
||||||
stream_token.index,
|
stream_token.index,
|
||||||
None,
|
None,
|
||||||
None,
|
stream_token.details.map(|d| d.finish_reason.to_string()),
|
||||||
))
|
))
|
||||||
.unwrap_or_else(|_| {
|
.unwrap_or_else(|e| {
|
||||||
println!("Failed to serialize ChatCompletionChunk");
|
println!("Failed to serialize ChatCompletionChunk: {:?}", e);
|
||||||
Event::default()
|
Event::default()
|
||||||
})
|
})
|
||||||
};
|
};
|
||||||
|
Loading…
Reference in New Issue
Block a user