Tokenization route.

This commit is contained in:
Nicolas Patry 2024-01-23 14:55:29 +01:00
parent 4f7f617e91
commit c12ff38974

View File

@ -528,11 +528,11 @@ async fn generate_stream_internal(
/// Generate tokens /// Generate tokens
#[utoipa::path( #[utoipa::path(
post, post,
tag = "Chat completions", tag = "Text Generation Inference",
path = "/v1/chat/completions", path = "/v1/chat/completions",
request_body = ChatRequest, request_body = ChatRequest,
responses( responses(
(status = 200, description = "Generated Text", body = GenerateResponse), (status = 200, description = "Generated Text", body = ChatCompletionChunk),
(status = 424, description = "Generation Error", body = ErrorResponse, (status = 424, description = "Generation Error", body = ErrorResponse,
example = json ! ({"error": "Request failed during generation"})), example = json ! ({"error": "Request failed during generation"})),
(status = 429, description = "Model is overloaded", body = ErrorResponse, (status = 429, description = "Model is overloaded", body = ErrorResponse,
@ -675,7 +675,7 @@ async fn chat_completions(
/// Tokenize inputs /// Tokenize inputs
#[utoipa::path( #[utoipa::path(
post, post,
tag = "Tokenize", tag = "Text Generation Inference",
path = "/tokenize", path = "/tokenize",
request_body = TokenizeRequest, request_body = TokenizeRequest,
responses( responses(
@ -765,6 +765,8 @@ pub async fn run(
compat_generate, compat_generate,
generate, generate,
generate_stream, generate_stream,
chat_completions,
tokenize,
metrics, metrics,
), ),
components( components(