diff --git a/docs/openapi.json b/docs/openapi.json
index 44691e4b..f552ee08 100644
--- a/docs/openapi.json
+++ b/docs/openapi.json
@@ -1013,6 +1013,7 @@
             "type": "integer",
             "format": "int32",
             "description": "The maximum number of tokens that can be generated in the chat completion.",
+            "default": "1024",
             "example": "32",
             "nullable": true,
             "minimum": 0
@@ -1329,7 +1330,8 @@
             "type": "integer",
             "format": "int32",
             "description": "The maximum number of tokens that can be generated in the chat completion.",
-            "default": "32",
+            "default": "1024",
+            "example": "32",
             "nullable": true,
             "minimum": 0
           },
@@ -1591,7 +1593,7 @@
             "type": "integer",
             "format": "int32",
             "description": "Maximum number of tokens to generate.",
-            "default": "100",
+            "default": "1024",
             "example": "20",
             "nullable": true,
             "minimum": 0
diff --git a/router/src/lib.rs b/router/src/lib.rs
index bb040397..40076564 100644
--- a/router/src/lib.rs
+++ b/router/src/lib.rs
@@ -333,7 +333,7 @@ pub(crate) struct GenerateParameters {
 
     /// Maximum number of tokens to generate.
     #[serde(default)]
-    #[schema(nullable = true, default = "256", example = "20")]
+    #[schema(nullable = true, default = "1024", example = "20")]
     pub max_new_tokens: Option<u32>,
 
     /// Whether to prepend the prompt to the generated text
@@ -460,7 +460,7 @@ pub struct CompletionRequest {
 
     /// The maximum number of tokens that can be generated in the chat completion.
     #[serde(default)]
-    #[schema(default = "32")]
+    #[schema(default = "1024", example = "32")]
     pub max_tokens: Option<u32>,
 
     /// What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while
@@ -838,7 +838,7 @@ pub(crate) struct ChatRequest {
 
     /// The maximum number of tokens that can be generated in the chat completion.
     #[serde(default)]
-    #[schema(example = "32")]
+    #[schema(default = "1024", example = "32")]
     pub max_tokens: Option<u32>,
 
     /// UNUSED
diff --git a/router/src/validation.rs b/router/src/validation.rs
index 14eaa186..8137ac58 100644
--- a/router/src/validation.rs
+++ b/router/src/validation.rs
@@ -22,7 +22,7 @@ use tokio::sync::oneshot;
 use tracing::{instrument, Span};
 use {once_cell::sync::Lazy, regex::Regex};
 
-static DEFAULT_GENERATION_LENGTH: u32 = 10;
+static DEFAULT_GENERATION_LENGTH: u32 = 1024;
 
 /// Validation
 #[derive(Debug, Clone)]