From 65748c7353a193209a48c7aa038452a6c610b1e1 Mon Sep 17 00:00:00 2001 From: Lucain Date: Tue, 16 Apr 2024 19:26:32 +0200 Subject: [PATCH] Update response type for `/v1/chat/completions` and `/v1/completions` (#1747) `/v1/chat/completions` and `/v1/completions` have different output types depending on the `stream` parameter. This PR aims at fixing the inconsistency in the auto-generated [openapi.json](https://huggingface.github.io/text-generation-inference/openapi.json) specs. cc @OlivierDehaene @drbh I reused what had been done for the `/` endpoint but haven't tested anything myself. Could you confirm this is the correct way of handling things? Also, should I update the openapi.json file manually? If yes, how can I do it? --- router/src/server.rs | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/router/src/server.rs b/router/src/server.rs index c8dc8359..f92028da 100644 --- a/router/src/server.rs +++ b/router/src/server.rs @@ -550,7 +550,11 @@ async fn generate_stream_internal( path = "/v1/completions", request_body = CompletionRequest, responses( - (status = 200, description = "Generated Text", body = ChatCompletionChunk), + (status = 200, description = "Generated Chat Completion", + content( + ("application/json" = Completion), + ("text/event-stream" = CompletionCompleteChunk), + )), (status = 424, description = "Generation Error", body = ErrorResponse, example = json ! ({"error": "Request failed during generation"})), (status = 429, description = "Model is overloaded", body = ErrorResponse, @@ -654,7 +658,7 @@ async fn completions( }) .map_or_else( |e| { - println!("Failed to serialize ChatCompletionChunk: {:?}", e); + println!("Failed to serialize CompletionCompleteChunk: {:?}", e); Event::default() }, |data| data, @@ -727,7 +731,11 @@ async fn completions( path = "/v1/chat/completions", request_body = ChatRequest, responses( - (status = 200, description = "Generated Text", body = ChatCompletionChunk), + (status = 200, description = "Generated Chat Completion", + content( + ("application/json" = ChatCompletion), + ("text/event-stream" = ChatCompletionChunk), + )), (status = 424, description = "Generation Error", body = ErrorResponse, example = json ! ({"error": "Request failed during generation"})), (status = 429, description = "Model is overloaded", body = ErrorResponse,