diff --git a/router/src/lib.rs b/router/src/lib.rs index 386b0556..a956b058 100644 --- a/router/src/lib.rs +++ b/router/src/lib.rs @@ -619,7 +619,7 @@ impl ChatCompletion { message, logprobs: return_logprobs .then(|| ChatCompletionLogprobs::from((details.tokens, details.top_tokens))), - finish_reason: details.finish_reason.to_string(), + finish_reason: details.finish_reason.format(true), }], usage: Usage { prompt_tokens: details.prefill.len() as u32, @@ -1117,6 +1117,15 @@ impl std::fmt::Display for FinishReason { } } +impl FinishReason { + pub fn format(&self, use_stop: bool) -> String { + match self { + FinishReason::EndOfSequenceToken if use_stop => "stop".to_string(), + _ => self.to_string(), + } + } +} + #[derive(Serialize, ToSchema)] pub(crate) struct BestOfSequence { #[schema(example = "test")] diff --git a/router/src/server.rs b/router/src/server.rs index 7655182a..4b6fe50c 100644 --- a/router/src/server.rs +++ b/router/src/server.rs @@ -1021,7 +1021,7 @@ async fn completions( total_tokens += details.prefill.len() as u32 + details.generated_tokens; Ok(CompletionComplete { - finish_reason: details.finish_reason.to_string(), + finish_reason: details.finish_reason.format(true), index: index as u32, logprobs: None, text: generation.generated_text, @@ -1212,7 +1212,7 @@ async fn chat_completions( tool_calls, current_time, logprobs, - stream_token.details.map(|d| d.finish_reason.to_string()), + stream_token.details.map(|d| d.finish_reason.format(true)), ), )) .unwrap_or_else(|e| {