This commit is contained in:
OlivierDehaene 2023-03-09 15:11:16 +01:00
parent 8d7a0c1992
commit a448acbfbe
2 changed files with 145 additions and 72 deletions

View File

@ -210,13 +210,62 @@
}, },
"components": { "components": {
"schemas": { "schemas": {
"BestOfSequence": {
"type": "object",
"required": [
"generated_text",
"finish_reason",
"generated_tokens",
"prefill",
"tokens"
],
"properties": {
"finish_reason": {
"$ref": "#/components/schemas/FinishReason"
},
"generated_text": {
"type": "string",
"example": "test"
},
"generated_tokens": {
"type": "integer",
"format": "int32",
"example": 1
},
"prefill": {
"type": "array",
"items": {
"$ref": "#/components/schemas/PrefillToken"
}
},
"seed": {
"type": "integer",
"format": "int64",
"example": 42
},
"tokens": {
"type": "array",
"items": {
"$ref": "#/components/schemas/Token"
}
}
}
},
"Details": { "Details": {
"type": "object", "type": "object",
"required": [ "required": [
"finish_reason", "finish_reason",
"generated_tokens" "generated_tokens",
"prefill",
"tokens"
], ],
"properties": { "properties": {
"best_of_sequences": {
"type": "array",
"items": {
"$ref": "#/components/schemas/BestOfSequence"
}
},
"finish_reason": { "finish_reason": {
"$ref": "#/components/schemas/FinishReason" "$ref": "#/components/schemas/FinishReason"
}, },
@ -247,11 +296,15 @@
"ErrorResponse": { "ErrorResponse": {
"type": "object", "type": "object",
"required": [ "required": [
"error" "error",
"error_type"
], ],
"properties": { "properties": {
"error": { "error": {
"type": "string" "type": "string"
},
"error_type": {
"type": "string"
} }
} }
}, },
@ -266,6 +319,13 @@
"GenerateParameters": { "GenerateParameters": {
"type": "object", "type": "object",
"properties": { "properties": {
"best_of": {
"type": "integer",
"default": "null",
"example": 1,
"nullable": true,
"exclusiveMinimum": 0.0
},
"details": { "details": {
"type": "boolean", "type": "boolean",
"default": "true" "default": "true"
@ -297,7 +357,11 @@
}, },
"seed": { "seed": {
"type": "integer", "type": "integer",
"format": "int64" "format": "int64",
"default": "null",
"example": "null",
"nullable": true,
"exclusiveMinimum": 0.0
}, },
"stop": { "stop": {
"type": "array", "type": "array",
@ -334,6 +398,15 @@
"maximum": 1.0, "maximum": 1.0,
"exclusiveMinimum": 0.0 "exclusiveMinimum": 0.0
}, },
"typical_p": {
"type": "number",
"format": "float",
"default": "null",
"example": 0.95,
"nullable": true,
"maximum": 1.0,
"exclusiveMinimum": 0.0
},
"watermark": { "watermark": {
"type": "boolean", "type": "boolean",
"default": "false", "default": "false",

View File

@ -87,21 +87,21 @@ async fn health(infer: Extension<Infer>) -> Result<(), (StatusCode, Json<ErrorRe
/// Generate tokens /// Generate tokens
#[utoipa::path( #[utoipa::path(
post, post,
tag = "Text Generation Inference", tag = "Text Generation Inference",
path = "/generate", path = "/generate",
request_body = GenerateRequest, request_body = GenerateRequest,
responses( responses(
(status = 200, description = "Generated Text", body = GenerateResponse), (status = 200, description = "Generated Text", body = GenerateResponse),
(status = 424, description = "Generation Error", body = ErrorResponse, (status = 424, description = "Generation Error", body = ErrorResponse,
example = json ! ({"error": "Request failed during generation"})), example = json ! ({"error": "Request failed during generation"})),
(status = 429, description = "Model is overloaded", body = ErrorResponse, (status = 429, description = "Model is overloaded", body = ErrorResponse,
example = json ! ({"error": "Model is overloaded"})), example = json ! ({"error": "Model is overloaded"})),
(status = 422, description = "Input validation error", body = ErrorResponse, (status = 422, description = "Input validation error", body = ErrorResponse,
example = json ! ({"error": "Input validation error"})), example = json ! ({"error": "Input validation error"})),
(status = 500, description = "Incomplete generation", body = ErrorResponse, (status = 500, description = "Incomplete generation", body = ErrorResponse,
example = json ! ({"error": "Incomplete generation"})), example = json ! ({"error": "Incomplete generation"})),
) )
)] )]
#[instrument( #[instrument(
skip(infer), skip(infer),
@ -253,26 +253,26 @@ async fn generate(
/// Generate a stream of token using Server-Sent Events /// Generate a stream of token using Server-Sent Events
#[utoipa::path( #[utoipa::path(
post, post,
tag = "Text Generation Inference", tag = "Text Generation Inference",
path = "/generate_stream", path = "/generate_stream",
request_body = GenerateRequest, request_body = GenerateRequest,
responses( responses(
(status = 200, description = "Generated Text", body = StreamResponse, (status = 200, description = "Generated Text", body = StreamResponse,
content_type = "text/event-stream"), content_type = "text/event-stream"),
(status = 424, description = "Generation Error", body = ErrorResponse, (status = 424, description = "Generation Error", body = ErrorResponse,
example = json ! ({"error": "Request failed during generation"}), example = json ! ({"error": "Request failed during generation"}),
content_type = "text/event-stream"), content_type = "text/event-stream"),
(status = 429, description = "Model is overloaded", body = ErrorResponse, (status = 429, description = "Model is overloaded", body = ErrorResponse,
example = json ! ({"error": "Model is overloaded"}), example = json ! ({"error": "Model is overloaded"}),
content_type = "text/event-stream"), content_type = "text/event-stream"),
(status = 422, description = "Input validation error", body = ErrorResponse, (status = 422, description = "Input validation error", body = ErrorResponse,
example = json ! ({"error": "Input validation error"}), example = json ! ({"error": "Input validation error"}),
content_type = "text/event-stream"), content_type = "text/event-stream"),
(status = 500, description = "Incomplete generation", body = ErrorResponse, (status = 500, description = "Incomplete generation", body = ErrorResponse,
example = json ! ({"error": "Incomplete generation"}), example = json ! ({"error": "Incomplete generation"}),
content_type = "text/event-stream"), content_type = "text/event-stream"),
) )
)] )]
#[instrument( #[instrument(
skip(infer), skip(infer),
@ -434,10 +434,10 @@ async fn generate_stream(
/// Prometheus metrics scrape endpoint /// Prometheus metrics scrape endpoint
#[utoipa::path( #[utoipa::path(
get, get,
tag = "Text Generation Inference", tag = "Text Generation Inference",
path = "/metrics", path = "/metrics",
responses((status = 200, description = "Prometheus Metrics", body = String)) responses((status = 200, description = "Prometheus Metrics", body = String))
)] )]
async fn metrics(prom_handle: Extension<PrometheusHandle>) -> String { async fn metrics(prom_handle: Extension<PrometheusHandle>) -> String {
prom_handle.render() prom_handle.render()
@ -463,36 +463,36 @@ pub async fn run(
// OpenAPI documentation // OpenAPI documentation
#[derive(OpenApi)] #[derive(OpenApi)]
#[openapi( #[openapi(
paths( paths(
generate, generate,
generate_stream, generate_stream,
metrics, metrics,
), ),
components( components(
schemas( schemas(
GenerateRequest, GenerateRequest,
GenerateParameters, GenerateParameters,
PrefillToken, PrefillToken,
Token, Token,
GenerateResponse, GenerateResponse,
BestOfSequence, BestOfSequence,
Details, Details,
FinishReason, FinishReason,
StreamResponse, StreamResponse,
StreamDetails, StreamDetails,
ErrorResponse, ErrorResponse,
) )
), ),
tags( tags(
(name = "Text Generation Inference", description = "Hugging Face Text Generation Inference API") (name = "Text Generation Inference", description = "Hugging Face Text Generation Inference API")
), ),
info( info(
title = "Text Generation Inference", title = "Text Generation Inference",
license( license(
name = "Apache 2.0", name = "Apache 2.0",
url = "https://www.apache.org/licenses/LICENSE-2.0" url = "https://www.apache.org/licenses/LICENSE-2.0"
) )
) )
)] )]
struct ApiDoc; struct ApiDoc;