From c12ff3897417015177631c99914010e821f475cf Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Tue, 23 Jan 2024 14:55:29 +0100 Subject: [PATCH] Tokenization route. --- router/src/server.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/router/src/server.rs b/router/src/server.rs index 365de1ca..42785025 100644 --- a/router/src/server.rs +++ b/router/src/server.rs @@ -528,11 +528,11 @@ async fn generate_stream_internal( /// Generate tokens #[utoipa::path( post, - tag = "Chat completions", + tag = "Text Generation Inference", path = "/v1/chat/completions", request_body = ChatRequest, responses( - (status = 200, description = "Generated Text", body = GenerateResponse), + (status = 200, description = "Generated Text", body = ChatCompletionChunk), (status = 424, description = "Generation Error", body = ErrorResponse, example = json ! ({"error": "Request failed during generation"})), (status = 429, description = "Model is overloaded", body = ErrorResponse, @@ -675,7 +675,7 @@ async fn chat_completions( /// Tokenize inputs #[utoipa::path( post, - tag = "Tokenize", + tag = "Text Generation Inference", path = "/tokenize", request_body = TokenizeRequest, responses( @@ -765,6 +765,8 @@ pub async fn run( compat_generate, generate, generate_stream, + chat_completions, + tokenize, metrics, ), components(