diff --git a/router/src/lib.rs b/router/src/lib.rs
index 386b0556..a956b058 100644
--- a/router/src/lib.rs
+++ b/router/src/lib.rs
@@ -619,7 +619,7 @@ impl ChatCompletion {
                 message,
                 logprobs: return_logprobs
                     .then(|| ChatCompletionLogprobs::from((details.tokens, details.top_tokens))),
-                finish_reason: details.finish_reason.to_string(),
+                finish_reason: details.finish_reason.format(true),
             }],
             usage: Usage {
                 prompt_tokens: details.prefill.len() as u32,
@@ -1117,6 +1117,15 @@ impl std::fmt::Display for FinishReason {
     }
 }
 
+impl FinishReason {
+    pub fn format(&self, use_stop: bool) -> String {
+        match self {
+            FinishReason::EndOfSequenceToken if use_stop => "stop".to_string(),
+            _ => self.to_string(),
+        }
+    }
+}
+
 #[derive(Serialize, ToSchema)]
 pub(crate) struct BestOfSequence {
     #[schema(example = "test")]
diff --git a/router/src/server.rs b/router/src/server.rs
index 7655182a..4b6fe50c 100644
--- a/router/src/server.rs
+++ b/router/src/server.rs
@@ -1021,7 +1021,7 @@ async fn completions(
                 total_tokens += details.prefill.len() as u32 + details.generated_tokens;
 
                 Ok(CompletionComplete {
-                    finish_reason: details.finish_reason.to_string(),
+                    finish_reason: details.finish_reason.format(true),
                     index: index as u32,
                     logprobs: None,
                     text: generation.generated_text,
@@ -1212,7 +1212,7 @@ async fn chat_completions(
                         tool_calls,
                         current_time,
                         logprobs,
-                        stream_token.details.map(|d| d.finish_reason.to_string()),
+                        stream_token.details.map(|d| d.finish_reason.format(true)),
                     ),
                 ))
                 .unwrap_or_else(|e| {