From 7d6216d63b28182720f708b7ec3f34f83169174e Mon Sep 17 00:00:00 2001 From: Lucain Date: Mon, 15 Apr 2024 16:57:16 +0200 Subject: [PATCH] Update response type for `/v1/chat/completions` and `/v1/completions` `/v1/chat/completions` and `/v1/completions` have different output types depending on the `stream` parameter. This PR aims at fixing the inconsistency in the auto-generated [openapi.json](https://huggingface.github.io/text-generation-inference/openapi.json) specs. cc @OlivierDehaene @drbh I reused what had been done for the `/` endpoint but haven't tested anything myself. Could you confirm this is the correct way of handling things? Also, should I update the openapi.json file manually? If yes, how can I do it? --- router/src/server.rs | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/router/src/server.rs b/router/src/server.rs index b8f93514..4fa4fc6c 100644 --- a/router/src/server.rs +++ b/router/src/server.rs @@ -549,7 +549,11 @@ async fn generate_stream_internal( path = "/v1/completions", request_body = CompletionRequest, responses( - (status = 200, description = "Generated Text", body = ChatCompletionChunk), + (status = 200, description = "Generated Chat Completion", + content( + ("application/json" = Completion), + ("text/event-stream" = CompletionCompleteChunk), + )), (status = 424, description = "Generation Error", body = ErrorResponse, example = json ! ({"error": "Request failed during generation"})), (status = 429, description = "Model is overloaded", body = ErrorResponse, @@ -653,7 +657,7 @@ async fn completions( }) .map_or_else( |e| { - println!("Failed to serialize ChatCompletionChunk: {:?}", e); + println!("Failed to serialize CompletionCompleteChunk: {:?}", e); Event::default() }, |data| data, @@ -726,7 +730,11 @@ async fn completions( path = "/v1/chat/completions", request_body = ChatRequest, responses( - (status = 200, description = "Generated Text", body = ChatCompletionChunk), + (status = 200, description = "Generated Chat Completion", + content( + ("application/json" = ChatCompletion), + ("text/event-stream" = ChatCompletionChunk), + )), (status = 424, description = "Generation Error", body = ErrorResponse, example = json ! ({"error": "Request failed during generation"})), (status = 429, description = "Model is overloaded", body = ErrorResponse,