fmt

2025-09-09 11:24:53 +00:00 · 2023-03-09 15:11:16 +01:00 · 2023-03-09 15:11:16 +01:00 · a448acbfbe
commit a448acbfbe
parent 8d7a0c1992
2 changed files with 145 additions and 72 deletions
--- a/docs/openapi.json
+++ b/docs/openapi.json
@ -210,13 +210,62 @@
  },
  "components": {
    "schemas": {
+      "BestOfSequence": {
+        "type": "object",
+        "required": [
+          "generated_text",
+          "finish_reason",
+          "generated_tokens",
+          "prefill",
+          "tokens"
+        ],
+        "properties": {
+          "finish_reason": {
+            "$ref": "#/components/schemas/FinishReason"
+          },
+          "generated_text": {
+            "type": "string",
+            "example": "test"
+          },
+          "generated_tokens": {
+            "type": "integer",
+            "format": "int32",
+            "example": 1
+          },
+          "prefill": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/PrefillToken"
+            }
+          },
+          "seed": {
+            "type": "integer",
+            "format": "int64",
+            "example": 42
+          },
+          "tokens": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/Token"
+            }
+          }
+        }
+      },
      "Details": {
        "type": "object",
        "required": [
          "finish_reason",
-          "generated_tokens"
+          "generated_tokens",
+          "prefill",
+          "tokens"
        ],
        "properties": {
+          "best_of_sequences": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/BestOfSequence"
+            }
+          },
          "finish_reason": {
            "$ref": "#/components/schemas/FinishReason"
          },
@ -247,11 +296,15 @@
      "ErrorResponse": {
        "type": "object",
        "required": [
-          "error"
+          "error",
+          "error_type"
        ],
        "properties": {
          "error": {
            "type": "string"
+          },
+          "error_type": {
+            "type": "string"
          }
        }
      },
@ -266,6 +319,13 @@
      "GenerateParameters": {
        "type": "object",
        "properties": {
+          "best_of": {
+            "type": "integer",
+            "default": "null",
+            "example": 1,
+            "nullable": true,
+            "exclusiveMinimum": 0.0
+          },
          "details": {
            "type": "boolean",
            "default": "true"
@ -297,7 +357,11 @@
          },
          "seed": {
            "type": "integer",
-            "format": "int64"
+            "format": "int64",
+            "default": "null",
+            "example": "null",
+            "nullable": true,
+            "exclusiveMinimum": 0.0
          },
          "stop": {
            "type": "array",
@ -334,6 +398,15 @@
            "maximum": 1.0,
            "exclusiveMinimum": 0.0
          },
+          "typical_p": {
+            "type": "number",
+            "format": "float",
+            "default": "null",
+            "example": 0.95,
+            "nullable": true,
+            "maximum": 1.0,
+            "exclusiveMinimum": 0.0
+          },
          "watermark": {
            "type": "boolean",
            "default": "false",
--- a/router/src/server.rs
+++ b/router/src/server.rs
@ -87,21 +87,21 @@ async fn health(infer: Extension<Infer>) -> Result<(), (StatusCode, Json<ErrorRe

 /// Generate tokens
 #[utoipa::path(
-post,
-tag = "Text Generation Inference",
-path = "/generate",
-request_body = GenerateRequest,
-responses(
-(status = 200, description = "Generated Text", body = GenerateResponse),
-(status = 424, description = "Generation Error", body = ErrorResponse,
-example = json ! ({"error": "Request failed during generation"})),
-(status = 429, description = "Model is overloaded", body = ErrorResponse,
-example = json ! ({"error": "Model is overloaded"})),
-(status = 422, description = "Input validation error", body = ErrorResponse,
-example = json ! ({"error": "Input validation error"})),
-(status = 500, description = "Incomplete generation", body = ErrorResponse,
-example = json ! ({"error": "Incomplete generation"})),
-)
+    post,
+    tag = "Text Generation Inference",
+    path = "/generate",
+    request_body = GenerateRequest,
+    responses(
+        (status = 200, description = "Generated Text", body = GenerateResponse),
+        (status = 424, description = "Generation Error", body = ErrorResponse,
+            example = json ! ({"error": "Request failed during generation"})),
+        (status = 429, description = "Model is overloaded", body = ErrorResponse,
+            example = json ! ({"error": "Model is overloaded"})),
+        (status = 422, description = "Input validation error", body = ErrorResponse,
+            example = json ! ({"error": "Input validation error"})),
+        (status = 500, description = "Incomplete generation", body = ErrorResponse,
+            example = json ! ({"error": "Incomplete generation"})),
+    )
 )]
 #[instrument(
    skip(infer),
@ -253,26 +253,26 @@ async fn generate(

 /// Generate a stream of token using Server-Sent Events
 #[utoipa::path(
-post,
-tag = "Text Generation Inference",
-path = "/generate_stream",
-request_body = GenerateRequest,
-responses(
-(status = 200, description = "Generated Text", body = StreamResponse,
-content_type = "text/event-stream"),
-(status = 424, description = "Generation Error", body = ErrorResponse,
-example = json ! ({"error": "Request failed during generation"}),
-content_type = "text/event-stream"),
-(status = 429, description = "Model is overloaded", body = ErrorResponse,
-example = json ! ({"error": "Model is overloaded"}),
-content_type = "text/event-stream"),
-(status = 422, description = "Input validation error", body = ErrorResponse,
-example = json ! ({"error": "Input validation error"}),
-content_type = "text/event-stream"),
-(status = 500, description = "Incomplete generation", body = ErrorResponse,
-example = json ! ({"error": "Incomplete generation"}),
-content_type = "text/event-stream"),
-)
+    post,
+    tag = "Text Generation Inference",
+    path = "/generate_stream",
+    request_body = GenerateRequest,
+    responses(
+        (status = 200, description = "Generated Text", body = StreamResponse,
+            content_type = "text/event-stream"),
+        (status = 424, description = "Generation Error", body = ErrorResponse,
+            example = json ! ({"error": "Request failed during generation"}),
+            content_type = "text/event-stream"),
+        (status = 429, description = "Model is overloaded", body = ErrorResponse,
+            example = json ! ({"error": "Model is overloaded"}),
+            content_type = "text/event-stream"),
+        (status = 422, description = "Input validation error", body = ErrorResponse,
+            example = json ! ({"error": "Input validation error"}),
+            content_type = "text/event-stream"),
+        (status = 500, description = "Incomplete generation", body = ErrorResponse,
+            example = json ! ({"error": "Incomplete generation"}),
+            content_type = "text/event-stream"),
+    )
 )]
 #[instrument(
    skip(infer),
@ -434,10 +434,10 @@ async fn generate_stream(

 /// Prometheus metrics scrape endpoint
 #[utoipa::path(
-get,
-tag = "Text Generation Inference",
-path = "/metrics",
-responses((status = 200, description = "Prometheus Metrics", body = String))
+    get,
+    tag = "Text Generation Inference",
+    path = "/metrics",
+    responses((status = 200, description = "Prometheus Metrics", body = String))
 )]
 async fn metrics(prom_handle: Extension<PrometheusHandle>) -> String {
    prom_handle.render()
@ -463,36 +463,36 @@ pub async fn run(
    // OpenAPI documentation
    #[derive(OpenApi)]
    #[openapi(
-    paths(
-    generate,
-    generate_stream,
-    metrics,
-    ),
-    components(
-    schemas(
-    GenerateRequest,
-    GenerateParameters,
-    PrefillToken,
-    Token,
-    GenerateResponse,
-    BestOfSequence,
-    Details,
-    FinishReason,
-    StreamResponse,
-    StreamDetails,
-    ErrorResponse,
-    )
-    ),
-    tags(
-    (name = "Text Generation Inference", description = "Hugging Face Text Generation Inference API")
-    ),
-    info(
-    title = "Text Generation Inference",
-    license(
-    name = "Apache 2.0",
-    url = "https://www.apache.org/licenses/LICENSE-2.0"
-    )
-    )
+        paths(
+            generate,
+            generate_stream,
+            metrics,
+        ),
+        components(
+            schemas(
+                GenerateRequest,
+                GenerateParameters,
+                PrefillToken,
+                Token,
+                GenerateResponse,
+                BestOfSequence,
+                Details,
+                FinishReason,
+                StreamResponse,
+                StreamDetails,
+                ErrorResponse,
+            )
+        ),
+        tags(
+            (name = "Text Generation Inference", description = "Hugging Face Text Generation Inference API")
+        ),
+        info(
+            title = "Text Generation Inference",
+            license(
+                name = "Apache 2.0",
+                url = "https://www.apache.org/licenses/LICENSE-2.0"
+            )
+        )
    )]
    struct ApiDoc;