From 4d00990ccde20c5f7f50a4b6eb7b34e551b2625f Mon Sep 17 00:00:00 2001
From: OlivierDehaene <23298448+OlivierDehaene@users.noreply.github.com>
Date: Thu, 2 Feb 2023 18:58:11 +0100
Subject: [PATCH] host swagger w/ github pages

---
 docs/index.html   |  30 ++++
 docs/openapi.json | 446 ++++++++++++++++++++++++++++++++++++++++++++++
 router/src/lib.rs |  11 +-
 3 files changed, 482 insertions(+), 5 deletions(-)
 create mode 100644 docs/index.html
 create mode 100644 docs/openapi.json
diff --git a/docs/index.html b/docs/index.html
new file mode 100644
index 00000000..e00e7446
--- /dev/null
+++ b/docs/index.html
@@ -0,0 +1,30 @@
+<html>
+    <head>
+        <!-- Load the latest Swagger UI code and style from npm using unpkg.com -->
+        <script src="https://unpkg.com/swagger-ui-dist@3/swagger-ui-bundle.js"></script>
+        <link rel="stylesheet" type="text/css" href="https://unpkg.com/swagger-ui-dist@3/swagger-ui.css"/>
+        <title>My New API</title>
+    </head>
+    <body>
+        <div id="swagger-ui"></div> <!-- Div to hold the UI component -->
+        <script>
+            window.onload = function () {
+                // Begin Swagger UI call region
+                const ui = SwaggerUIBundle({
+                    url: "openapi.json", //Location of Open API spec in the repo
+                    dom_id: '#swagger-ui',
+                    deepLinking: true,
+                    supportedSubmitMethods: [],
+                    presets: [
+                        SwaggerUIBundle.presets.apis,
+                        SwaggerUIBundle.SwaggerUIStandalonePreset
+                    ],
+                    plugins: [
+                        SwaggerUIBundle.plugins.DownloadUrl
+                    ],
+                })
+                window.ui = ui
+            }
+        </script>
+    </body>
+</html>
\ No newline at end of file
diff --git a/docs/openapi.json b/docs/openapi.json
new file mode 100644
index 00000000..d885b97f
--- /dev/null
+++ b/docs/openapi.json
@@ -0,0 +1,446 @@
+{
+  "openapi": "3.0.3",
+  "info": {
+    "title": "Text Generation Inference",
+    "description": "Text Generation Webserver",
+    "contact": {
+      "name": "Olivier Dehaene",
+      "email": ""
+    },
+    "license": {
+      "name": "Apache 2.0",
+      "url": "https://www.apache.org/licenses/LICENSE-2.0"
+    },
+    "version": "0.1.0"
+  },
+  "paths": {
+    "/generate": {
+      "post": {
+        "tags": [
+          "Text Generation Inference"
+        ],
+        "summary": "Generate tokens",
+        "description": "Generate tokens",
+        "operationId": "generate",
+        "requestBody": {
+          "content": {
+            "application/json": {
+              "schema": {
+                "$ref": "#/components/schemas/GenerateRequest"
+              }
+            }
+          },
+          "required": true
+        },
+        "responses": {
+          "200": {
+            "description": "Generated Text",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "array",
+                  "items": {
+                    "$ref": "#/components/schemas/GenerateResponse"
+                  }
+                }
+              }
+            }
+          },
+          "422": {
+            "description": "Input validation error",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "array",
+                  "items": {
+                    "$ref": "#/components/schemas/ErrorResponse"
+                  }
+                },
+                "example": {
+                  "error": "Input validation error"
+                }
+              }
+            }
+          },
+          "424": {
+            "description": "Generation Error",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "array",
+                  "items": {
+                    "$ref": "#/components/schemas/ErrorResponse"
+                  }
+                },
+                "example": {
+                  "error": "Request failed during generation"
+                }
+              }
+            }
+          },
+          "429": {
+            "description": "Model is overloaded",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "array",
+                  "items": {
+                    "$ref": "#/components/schemas/ErrorResponse"
+                  }
+                },
+                "example": {
+                  "error": "Model is overloaded"
+                }
+              }
+            }
+          },
+          "500": {
+            "description": "Incomplete generation",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "array",
+                  "items": {
+                    "$ref": "#/components/schemas/ErrorResponse"
+                  }
+                },
+                "example": {
+                  "error": "Incomplete generation"
+                }
+              }
+            }
+          }
+        },
+        "deprecated": false
+      }
+    },
+    "/generate_stream": {
+      "post": {
+        "tags": [
+          "Text Generation Inference"
+        ],
+        "summary": "Generate a stream of token using Server Side Events",
+        "description": "Generate a stream of token using Server Side Events",
+        "operationId": "generate_stream",
+        "requestBody": {
+          "content": {
+            "application/json": {
+              "schema": {
+                "$ref": "#/components/schemas/GenerateRequest"
+              }
+            }
+          },
+          "required": true
+        },
+        "responses": {
+          "200": {
+            "description": "Generated Text",
+            "content": {
+              "text/event-stream ": {
+                "schema": {
+                  "type": "array",
+                  "items": {
+                    "$ref": "#/components/schemas/StreamResponse"
+                  }
+                }
+              }
+            }
+          },
+          "422": {
+            "description": "Input validation error",
+            "content": {
+              "text/event-stream ": {
+                "schema": {
+                  "type": "array",
+                  "items": {
+                    "$ref": "#/components/schemas/ErrorResponse"
+                  }
+                },
+                "example": {
+                  "error": "Input validation error"
+                }
+              }
+            }
+          },
+          "424": {
+            "description": "Generation Error",
+            "content": {
+              "text/event-stream ": {
+                "schema": {
+                  "type": "array",
+                  "items": {
+                    "$ref": "#/components/schemas/ErrorResponse"
+                  }
+                },
+                "example": {
+                  "error": "Request failed during generation"
+                }
+              }
+            }
+          },
+          "429": {
+            "description": "Model is overloaded",
+            "content": {
+              "text/event-stream ": {
+                "schema": {
+                  "type": "array",
+                  "items": {
+                    "$ref": "#/components/schemas/ErrorResponse"
+                  }
+                },
+                "example": {
+                  "error": "Model is overloaded"
+                }
+              }
+            }
+          },
+          "500": {
+            "description": "Incomplete generation",
+            "content": {
+              "text/event-stream ": {
+                "schema": {
+                  "type": "array",
+                  "items": {
+                    "$ref": "#/components/schemas/ErrorResponse"
+                  }
+                },
+                "example": {
+                  "error": "Incomplete generation"
+                }
+              }
+            }
+          }
+        },
+        "deprecated": false
+      }
+    }
+  },
+  "components": {
+    "schemas": {
+      "Details": {
+        "type": "object",
+        "required": [
+          "finish_reason",
+          "generated_tokens"
+        ],
+        "properties": {
+          "finish_reason": {
+            "$ref": "#/components/schemas/FinishReason"
+          },
+          "generated_tokens": {
+            "type": "integer",
+            "format": "int32",
+            "example": 1
+          },
+          "prefill": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/Token"
+            }
+          },
+          "seed": {
+            "type": "integer",
+            "format": "int64",
+            "example": 42
+          },
+          "tokens": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/Token"
+            }
+          }
+        }
+      },
+      "ErrorResponse": {
+        "type": "object",
+        "required": [
+          "error"
+        ],
+        "properties": {
+          "error": {
+            "type": "string"
+          }
+        }
+      },
+      "FinishReason": {
+        "type": "string",
+        "enum": [
+          "length",
+          "eos_token",
+          "stop_sequence"
+        ]
+      },
+      "GenerateParameters": {
+        "type": "object",
+        "properties": {
+          "details": {
+            "type": "boolean",
+            "default": "true"
+          },
+          "do_sample": {
+            "type": "boolean",
+            "default": "false",
+            "example": true
+          },
+          "max_new_tokens": {
+            "type": "integer",
+            "format": "int32",
+            "default": "20",
+            "exclusiveMaximum": 512.0,
+            "exclusiveMinimum": 0.0
+          },
+          "repetition_penalty": {
+            "type": "number",
+            "format": "float",
+            "default": "null",
+            "example": 1.03,
+            "nullable": true,
+            "exclusiveMinimum": 0.0
+          },
+          "seed": {
+            "type": "integer",
+            "format": "int64"
+          },
+          "stop": {
+            "type": "array",
+            "items": {
+              "type": "string"
+            },
+            "example": [
+              "photographer"
+            ],
+            "maxItems": 4
+          },
+          "temperature": {
+            "type": "number",
+            "format": "float",
+            "default": "null",
+            "example": 0.5,
+            "nullable": true,
+            "exclusiveMinimum": 0.0
+          },
+          "top_k": {
+            "type": "integer",
+            "format": "int32",
+            "default": "null",
+            "example": 10,
+            "nullable": true,
+            "exclusiveMinimum": 0.0
+          },
+          "top_p": {
+            "type": "number",
+            "format": "float",
+            "default": "null",
+            "example": 0.95,
+            "nullable": true,
+            "maximum": 1.0,
+            "exclusiveMinimum": 0.0
+          }
+        }
+      },
+      "GenerateRequest": {
+        "type": "object",
+        "required": [
+          "inputs"
+        ],
+        "properties": {
+          "inputs": {
+            "type": "string",
+            "example": "My name is Olivier and I"
+          },
+          "parameters": {
+            "$ref": "#/components/schemas/GenerateParameters"
+          }
+        }
+      },
+      "GenerateResponse": {
+        "type": "object",
+        "required": [
+          "generated_text"
+        ],
+        "properties": {
+          "details": {
+            "$ref": "#/components/schemas/Details"
+          },
+          "generated_text": {
+            "type": "string",
+            "example": "test"
+          }
+        }
+      },
+      "StreamDetails": {
+        "type": "object",
+        "required": [
+          "finish_reason",
+          "generated_tokens"
+        ],
+        "properties": {
+          "finish_reason": {
+            "$ref": "#/components/schemas/FinishReason"
+          },
+          "generated_tokens": {
+            "type": "integer",
+            "format": "int32",
+            "example": 1
+          },
+          "seed": {
+            "type": "integer",
+            "format": "int64",
+            "example": 42
+          }
+        }
+      },
+      "StreamResponse": {
+        "type": "object",
+        "required": [
+          "token"
+        ],
+        "properties": {
+          "details": {
+            "$ref": "#/components/schemas/StreamDetails"
+          },
+          "generated_text": {
+            "type": "string",
+            "default": "null",
+            "example": "test",
+            "nullable": true
+          },
+          "token": {
+            "$ref": "#/components/schemas/Token"
+          }
+        }
+      },
+      "Token": {
+        "type": "object",
+        "required": [
+          "id",
+          "text",
+          "logprob"
+        ],
+        "properties": {
+          "id": {
+            "type": "integer",
+            "format": "int32",
+            "example": 0
+          },
+          "logprob": {
+            "type": "number",
+            "format": "float",
+            "example": -0.34,
+            "nullable": true
+          },
+          "text": {
+            "type": "string",
+            "example": "test"
+          }
+        }
+      }
+    }
+  },
+  "tags": [
+    {
+      "name": "Text Generation Inference",
+      "description": "Hugging Face Text Generation Inference API"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/router/src/lib.rs b/router/src/lib.rs
index c88cae1d..a545f65e 100644
--- a/router/src/lib.rs
+++ b/router/src/lib.rs
@@ -13,24 +13,25 @@ use validation::Validation;
 #[derive(Clone, Debug, Deserialize, ToSchema)]
 pub(crate) struct GenerateParameters {
     #[serde(default)]
-    #[schema(exclusive_minimum = 0.0, nullable = true, default = "null")]
+    #[schema(exclusive_minimum = 0.0, nullable = true, default = "null", example = 0.5)]
     pub temperature: Option<f32>,
     #[serde(default)]
-    #[schema(exclusive_minimum = 0.0, nullable = true, default = "null")]
+    #[schema(exclusive_minimum = 0.0, nullable = true, default = "null", example = 1.03)]
     pub repetition_penalty: Option<f32>,
     #[serde(default)]
-    #[schema(exclusive_minimum = 0, nullable = true, default = "null")]
+    #[schema(exclusive_minimum = 0, nullable = true, default = "null", example = 10)]
     pub top_k: Option<i32>,
     #[serde(default)]
     #[schema(
         exclusive_minimum = 0.0,
         maximum = 1.0,
         nullable = true,
-        default = "null"
+        default = "null",
+        example = 0.95
     )]
     pub top_p: Option<f32>,
     #[serde(default = "default_do_sample")]
-    #[schema(default = "false")]
+    #[schema(default = "false", example = true)]
     pub do_sample: bool,
     #[serde(default = "default_max_new_tokens")]
     #[schema(exclusive_minimum = 0, exclusive_maximum = 512, default = "20")]