diff --git a/.redocly.lint-ignore.yaml b/.redocly.lint-ignore.yaml
new file mode 100644
index 00000000..382c9ab6
--- /dev/null
+++ b/.redocly.lint-ignore.yaml
@@ -0,0 +1,79 @@
+# This file instructs Redocly's linter to ignore the rules contained for specific parts of your API.
+# See https://redoc.ly/docs/cli/ for more information.
+docs/openapi.json:
+  no-empty-servers:
+    - '#/openapi'
+  spec:
+    - >-
+      #/components/schemas/GenerateParameters/properties/best_of/exclusiveMinimum
+    - >-
+      #/components/schemas/GenerateParameters/properties/frequency_penalty/exclusiveMinimum
+    - '#/components/schemas/GenerateParameters/properties/grammar/nullable'
+    - >-
+      #/components/schemas/GenerateParameters/properties/repetition_penalty/exclusiveMinimum
+    - '#/components/schemas/GenerateParameters/properties/seed/exclusiveMinimum'
+    - >-
+      #/components/schemas/GenerateParameters/properties/temperature/exclusiveMinimum
+    - '#/components/schemas/GenerateParameters/properties/top_k/exclusiveMinimum'
+    - >-
+      #/components/schemas/GenerateParameters/properties/top_n_tokens/exclusiveMinimum
+    - '#/components/schemas/GenerateParameters/properties/top_p/exclusiveMinimum'
+    - >-
+      #/components/schemas/GenerateParameters/properties/typical_p/exclusiveMinimum
+    - '#/components/schemas/GenerateResponse/properties/details/nullable'
+    - '#/components/schemas/StreamResponse/properties/details/nullable'
+    - '#/components/schemas/ChatRequest/properties/response_format/nullable'
+    - '#/components/schemas/ChatRequest/properties/tool_choice/nullable'
+    - '#/components/schemas/ToolChoice/nullable'
+    - '#/components/schemas/ChatCompletionComplete/properties/logprobs/nullable'
+    - '#/components/schemas/ChatCompletionChoice/properties/logprobs/nullable'
+  no-invalid-media-type-examples:
+    - '#/paths/~1/post/responses/422/content/application~1json/example'
+    - '#/paths/~1/post/responses/424/content/application~1json/example'
+    - '#/paths/~1/post/responses/429/content/application~1json/example'
+    - '#/paths/~1/post/responses/500/content/application~1json/example'
+    - '#/paths/~1generate/post/responses/422/content/application~1json/example'
+    - '#/paths/~1generate/post/responses/424/content/application~1json/example'
+    - '#/paths/~1generate/post/responses/429/content/application~1json/example'
+    - '#/paths/~1generate/post/responses/500/content/application~1json/example'
+    - >-
+      #/paths/~1generate_stream/post/responses/422/content/text~1event-stream/example
+    - >-
+      #/paths/~1generate_stream/post/responses/424/content/text~1event-stream/example
+    - >-
+      #/paths/~1generate_stream/post/responses/429/content/text~1event-stream/example
+    - >-
+      #/paths/~1generate_stream/post/responses/500/content/text~1event-stream/example
+    - '#/paths/~1tokenize/post/responses/404/content/application~1json/example'
+    - >-
+      #/paths/~1v1~1chat~1completions/post/responses/422/content/application~1json/example
+    - >-
+      #/paths/~1v1~1chat~1completions/post/responses/424/content/application~1json/example
+    - >-
+      #/paths/~1v1~1chat~1completions/post/responses/429/content/application~1json/example
+    - >-
+      #/paths/~1v1~1chat~1completions/post/responses/500/content/application~1json/example
+    - >-
+      #/paths/~1v1~1completions/post/responses/422/content/application~1json/example
+    - >-
+      #/paths/~1v1~1completions/post/responses/424/content/application~1json/example
+    - >-
+      #/paths/~1v1~1completions/post/responses/429/content/application~1json/example
+    - >-
+      #/paths/~1v1~1completions/post/responses/500/content/application~1json/example
+  operation-4xx-response:
+    - '#/paths/~1health/get/responses'
+    - '#/paths/~1info/get/responses'
+    - '#/paths/~1metrics/get/responses'
+  no-unused-components:
+    - '#/components/schemas/Completion'
+  security-defined:
+    - '#/paths/~1/post'
+    - '#/paths/~1generate/post'
+    - '#/paths/~1generate_stream/post'
+    - '#/paths/~1health/get'
+    - '#/paths/~1info/get'
+    - '#/paths/~1metrics/get'
+    - '#/paths/~1tokenize/post'
+    - '#/paths/~1v1~1chat~1completions/post'
+    - '#/paths/~1v1~1completions/post'
diff --git a/backends/v3/src/main.rs b/backends/v3/src/main.rs
index dfa90682..52e13cfa 100644
--- a/backends/v3/src/main.rs
+++ b/backends/v3/src/main.rs
@@ -113,19 +113,14 @@ async fn main() -> Result<(), RouterError> {
         max_client_batch_size,
     } = args;
 
-    let print_schema_command = match command {
-        Some(Commands::PrintSchema) => true,
-        None => {
-            // only init logging if we are not running the print schema command
-            text_generation_router::logging::init_logging(
-                otlp_endpoint,
-                otlp_service_name,
-                json_output,
-            );
-            false
-        }
+    if let Some(Commands::PrintSchema) = command {
+        use utoipa::OpenApi;
+        let api_doc = text_generation_router::server::ApiDoc::openapi();
+        let api_doc = serde_json::to_string_pretty(&api_doc).unwrap();
+        println!("{}", api_doc);
+        std::process::exit(0);
     };
-    // Launch Tokio runtime
+    text_generation_router::logging::init_logging(otlp_endpoint, otlp_service_name, json_output);
 
     // Validate args
     if max_input_tokens >= max_total_tokens {
@@ -187,7 +182,6 @@ async fn main() -> Result<(), RouterError> {
         messages_api_enabled,
         disable_grammar_support,
         max_client_batch_size,
-        print_schema_command,
     )
     .await?;
     Ok(())
diff --git a/docs/openapi.json b/docs/openapi.json
index db163ca0..ed9b0b96 100644
--- a/docs/openapi.json
+++ b/docs/openapi.json
@@ -1580,16 +1580,11 @@
         "type": "object",
         "required": [
           "model_id",
-          "model_dtype",
-          "model_device_type",
           "max_concurrent_requests",
           "max_best_of",
           "max_stop_sequences",
           "max_input_tokens",
           "max_total_tokens",
-          "waiting_served_ratio",
-          "max_batch_total_tokens",
-          "max_waiting_tokens",
           "validation_workers",
           "max_client_batch_size",
           "router",
@@ -1601,18 +1596,6 @@
             "example": "null",
             "nullable": true
           },
-          "max_batch_size": {
-            "type": "integer",
-            "example": "null",
-            "nullable": true,
-            "minimum": 0
-          },
-          "max_batch_total_tokens": {
-            "type": "integer",
-            "format": "int32",
-            "example": "32000",
-            "minimum": 0
-          },
           "max_best_of": {
             "type": "integer",
             "example": "2",
@@ -1644,19 +1627,6 @@
             "example": "2048",
             "minimum": 0
           },
-          "max_waiting_tokens": {
-            "type": "integer",
-            "example": "20",
-            "minimum": 0
-          },
-          "model_device_type": {
-            "type": "string",
-            "example": "cuda"
-          },
-          "model_dtype": {
-            "type": "string",
-            "example": "torch.float16"
-          },
           "model_id": {
             "type": "string",
             "description": "Model info",
@@ -1690,11 +1660,6 @@
           "version": {
             "type": "string",
             "example": "0.5.0"
-          },
-          "waiting_served_ratio": {
-            "type": "number",
-            "format": "float",
-            "example": "1.2"
           }
         }
       },
diff --git a/router/src/server.rs b/router/src/server.rs
index 11e84359..c3b32c00 100644
--- a/router/src/server.rs
+++ b/router/src/server.rs
@@ -1396,6 +1396,90 @@ async fn metrics(prom_handle: Extension<PrometheusHandle>) -> String {
 #[derive(Clone, Debug)]
 pub(crate) struct ComputeType(String);
 
+// OpenAPI documentation
+#[derive(OpenApi)]
+#[openapi(
+paths(
+health,
+get_model_info,
+compat_generate,
+generate,
+generate_stream,
+chat_completions,
+completions,
+tokenize,
+metrics,
+),
+components(
+schemas(
+Info,
+CompatGenerateRequest,
+GenerateRequest,
+GrammarType,
+ChatRequest,
+Message,
+MessageContent,
+MessageChunk,
+Url,
+FunctionName,
+OutputMessage,
+TextMessage,
+ToolCallMessage,
+ToolCallDelta,
+ChatCompletionComplete,
+ChatCompletionChoice,
+ChatCompletionDelta,
+ChatCompletionChunk,
+ChatCompletionLogprob,
+ChatCompletionLogprobs,
+ChatCompletionTopLogprob,
+ChatCompletion,
+CompletionRequest,
+CompletionComplete,
+Chunk,
+Completion,
+CompletionFinal,
+Prompt,
+GenerateParameters,
+PrefillToken,
+Token,
+GenerateResponse,
+TokenizeResponse,
+SimpleToken,
+BestOfSequence,
+Details,
+FinishReason,
+StreamResponse,
+StreamDetails,
+ErrorResponse,
+GrammarType,
+Usage,
+DeltaToolCall,
+ToolType,
+Tool,
+ToolCall,
+Function,
+FunctionDefinition,
+ToolChoice,
+)
+),
+tags(
+(name = "Text Generation Inference", description = "Hugging Face Text Generation Inference API")
+),
+info(
+title = "Text Generation Inference",
+license(
+name = "Apache 2.0",
+url = "https://www.apache.org/licenses/LICENSE-2.0"
+)
+)
+)]
+pub struct ApiDoc;
+
+pub fn schema() -> ApiDoc {
+    ApiDoc
+}
+
 /// Serving method
 #[allow(clippy::too_many_arguments)]
 pub async fn run(
@@ -1420,95 +1504,7 @@ pub async fn run(
     messages_api_enabled: bool,
     grammar_support: bool,
     max_client_batch_size: usize,
-    print_schema_command: bool,
 ) -> Result<(), WebServerError> {
-    // OpenAPI documentation
-    #[derive(OpenApi)]
-    #[openapi(
-    paths(
-    health,
-    get_model_info,
-    compat_generate,
-    generate,
-    generate_stream,
-    chat_completions,
-    completions,
-    tokenize,
-    metrics,
-    ),
-    components(
-    schemas(
-    Info,
-    CompatGenerateRequest,
-    GenerateRequest,
-    GrammarType,
-    ChatRequest,
-    Message,
-    MessageContent,
-    MessageChunk,
-    Url,
-    FunctionName,
-    OutputMessage,
-    TextMessage,
-    ToolCallMessage,
-    ToolCallDelta,
-    ChatCompletionComplete,
-    ChatCompletionChoice,
-    ChatCompletionDelta,
-    ChatCompletionChunk,
-    ChatCompletionLogprob,
-    ChatCompletionLogprobs,
-    ChatCompletionTopLogprob,
-    ChatCompletion,
-    CompletionRequest,
-    CompletionComplete,
-    Chunk,
-    Completion,
-    CompletionFinal,
-    Prompt,
-    GenerateParameters,
-    PrefillToken,
-    Token,
-    GenerateResponse,
-    TokenizeResponse,
-    SimpleToken,
-    BestOfSequence,
-    Details,
-    FinishReason,
-    StreamResponse,
-    StreamDetails,
-    ErrorResponse,
-    GrammarType,
-    Usage,
-    DeltaToolCall,
-    ToolType,
-    Tool,
-    ToolCall,
-    Function,
-    FunctionDefinition,
-    ToolChoice,
-    )
-    ),
-    tags(
-    (name = "Text Generation Inference", description = "Hugging Face Text Generation Inference API")
-    ),
-    info(
-    title = "Text Generation Inference",
-    license(
-    name = "Apache 2.0",
-    url = "https://www.apache.org/licenses/LICENSE-2.0"
-    )
-    )
-    )]
-    struct ApiDoc;
-
-    // Create state
-    if print_schema_command {
-        let api_doc = ApiDoc::openapi();
-        let api_doc = serde_json::to_string_pretty(&api_doc).unwrap();
-        println!("{}", api_doc);
-        std::process::exit(0);
-    }
     // CORS allowed origins
     // map to go inside the option and then map to parse from String to HeaderValue
     // Finally, convert to AllowOrigin
diff --git a/update_doc.py b/update_doc.py
index bfa7e4e9..428d4452 100644
--- a/update_doc.py
+++ b/update_doc.py
@@ -167,22 +167,24 @@ def check_openapi(check: bool):
     else:
         os.rename(tmp_filename, filename)
         print("OpenAPI documentation updated.")
-    errors = subprocess.run(
+    p = subprocess.run(
         [
-            "swagger-cli",
+            "redocly",
             # allow for trailing whitespace since it's not significant
             # and the precommit hook will remove it
-            "validate",
+            "lint",
             filename,
         ],
         capture_output=True,
-    ).stderr.decode("utf-8")
+    )
+    errors = p.stderr.decode("utf-8")
     # The openapi specs fails on `exclusive_minimum` which is expected to be a boolean where
     # utoipa outputs a value instead: https://github.com/juhaku/utoipa/issues/969
-    if not errors.startswith("Swagger schema validation failed."):
+    print(errors)
+    if p.returncode != 0:
         print(errors)
         raise Exception(
-            f"OpenAPI documentation is invalid, `swagger-cli validate` showed some error:\n {errors}"
+            f"OpenAPI documentation is invalid, `redocly lint {filename}` showed some error:\n {errors}"
         )
     return True