Update response type for /v1/chat/completions and /v1/completions

`/v1/chat/completions` and `/v1/completions` have different output types depending on the `stream` parameter. This PR aims at fixing the inconsistency in the auto-generated [openapi.json](https://huggingface.github.io/text-generation-inference/openapi.json) specs.

cc @OlivierDehaene @drbh I reused what had been done for the `/` endpoint but haven't tested anything myself. Could you confirm this is the correct way of handling things?

Also, should I update the openapi.json file manually? If yes, how can I do it?
This commit is contained in:
Lucain 2024-04-15 16:57:16 +02:00 committed by GitHub
parent c38a7d7ddd
commit 7d6216d63b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -549,7 +549,11 @@ async fn generate_stream_internal(
path = "/v1/completions", path = "/v1/completions",
request_body = CompletionRequest, request_body = CompletionRequest,
responses( responses(
(status = 200, description = "Generated Text", body = ChatCompletionChunk), (status = 200, description = "Generated Chat Completion",
content(
("application/json" = Completion),
("text/event-stream" = CompletionCompleteChunk),
)),
(status = 424, description = "Generation Error", body = ErrorResponse, (status = 424, description = "Generation Error", body = ErrorResponse,
example = json ! ({"error": "Request failed during generation"})), example = json ! ({"error": "Request failed during generation"})),
(status = 429, description = "Model is overloaded", body = ErrorResponse, (status = 429, description = "Model is overloaded", body = ErrorResponse,
@ -653,7 +657,7 @@ async fn completions(
}) })
.map_or_else( .map_or_else(
|e| { |e| {
println!("Failed to serialize ChatCompletionChunk: {:?}", e); println!("Failed to serialize CompletionCompleteChunk: {:?}", e);
Event::default() Event::default()
}, },
|data| data, |data| data,
@ -726,7 +730,11 @@ async fn completions(
path = "/v1/chat/completions", path = "/v1/chat/completions",
request_body = ChatRequest, request_body = ChatRequest,
responses( responses(
(status = 200, description = "Generated Text", body = ChatCompletionChunk), (status = 200, description = "Generated Chat Completion",
content(
("application/json" = ChatCompletion),
("text/event-stream" = ChatCompletionChunk),
)),
(status = 424, description = "Generation Error", body = ErrorResponse, (status = 424, description = "Generation Error", body = ErrorResponse,
example = json ! ({"error": "Request failed during generation"})), example = json ! ({"error": "Request failed during generation"})),
(status = 429, description = "Model is overloaded", body = ErrorResponse, (status = 429, description = "Model is overloaded", body = ErrorResponse,