From 65748c7353a193209a48c7aa038452a6c610b1e1 Mon Sep 17 00:00:00 2001
From: Lucain <lucain@huggingface.co>
Date: Tue, 16 Apr 2024 19:26:32 +0200
Subject: [PATCH] Update response type for `/v1/chat/completions` and
 `/v1/completions` (#1747)

`/v1/chat/completions` and `/v1/completions` have different output types
depending on the `stream` parameter. This PR aims at fixing the
inconsistency in the auto-generated
[openapi.json](https://huggingface.github.io/text-generation-inference/openapi.json)
specs.

cc @OlivierDehaene @drbh I reused what had been done for the `/`
endpoint but haven't tested anything myself. Could you confirm this is
the correct way of handling things?

Also, should I update the openapi.json file manually? If yes, how can I
do it?
---
 router/src/server.rs | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/router/src/server.rs b/router/src/server.rs
index c8dc8359..f92028da 100644
--- a/router/src/server.rs
+++ b/router/src/server.rs
@@ -550,7 +550,11 @@ async fn generate_stream_internal(
     path = "/v1/completions",
     request_body = CompletionRequest,
     responses(
-    (status = 200, description = "Generated Text", body = ChatCompletionChunk),
+    (status = 200, description = "Generated Chat Completion",
+    content(
+    ("application/json" = Completion),
+    ("text/event-stream" = CompletionCompleteChunk),
+    )),
     (status = 424, description = "Generation Error", body = ErrorResponse,
     example = json ! ({"error": "Request failed during generation"})),
     (status = 429, description = "Model is overloaded", body = ErrorResponse,
@@ -654,7 +658,7 @@ async fn completions(
                 })
                 .map_or_else(
                     |e| {
-                        println!("Failed to serialize ChatCompletionChunk: {:?}", e);
+                        println!("Failed to serialize CompletionCompleteChunk: {:?}", e);
                         Event::default()
                     },
                     |data| data,
@@ -727,7 +731,11 @@ async fn completions(
     path = "/v1/chat/completions",
     request_body = ChatRequest,
     responses(
-    (status = 200, description = "Generated Text", body = ChatCompletionChunk),
+    (status = 200, description = "Generated Chat Completion",
+    content(
+    ("application/json" = ChatCompletion),
+    ("text/event-stream" = ChatCompletionChunk),
+    )),
     (status = 424, description = "Generation Error", body = ErrorResponse,
     example = json ! ({"error": "Request failed during generation"})),
     (status = 429, description = "Model is overloaded", body = ErrorResponse,