diff --git a/router/src/lib.rs b/router/src/lib.rs index f648f597..b07b9789 100644 --- a/router/src/lib.rs +++ b/router/src/lib.rs @@ -244,7 +244,7 @@ impl ChatCompletion { content: ouput, }, logprobs: None, - finish_reason: None, + finish_reason: details.finish_reason.to_string().into(), }], usage: Usage { prompt_tokens: details.prompt_token_count, @@ -326,7 +326,6 @@ pub(crate) struct ChatRequest { #[serde(default = "default_request_messages")] pub messages: Vec, - /// UNUSED /// Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, /// decreasing the model's likelihood to repeat the same line verbatim. #[serde(default)] @@ -444,6 +443,16 @@ pub(crate) enum FinishReason { StopSequence, } +impl std::fmt::Display for FinishReason { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + FinishReason::Length => write!(f, "length"), + FinishReason::EndOfSequenceToken => write!(f, "eos_token"), + FinishReason::StopSequence => write!(f, "stop_sequence"), + } + } +} + #[derive(Serialize, ToSchema)] pub(crate) struct BestOfSequence { #[schema(example = "test")] diff --git a/router/src/server.rs b/router/src/server.rs index 5e2574d4..536be9d3 100644 --- a/router/src/server.rs +++ b/router/src/server.rs @@ -627,10 +627,10 @@ async fn chat_completions( current_time, stream_token.index, None, - None, + stream_token.details.map(|d| d.finish_reason.to_string()), )) - .unwrap_or_else(|_| { - println!("Failed to serialize ChatCompletionChunk"); + .unwrap_or_else(|e| { + println!("Failed to serialize ChatCompletionChunk: {:?}", e); Event::default() }) };