diff --git a/docs/openapi.json b/docs/openapi.json index 44691e4b..f552ee08 100644 --- a/docs/openapi.json +++ b/docs/openapi.json @@ -1013,6 +1013,7 @@ "type": "integer", "format": "int32", "description": "The maximum number of tokens that can be generated in the chat completion.", + "default": "1024", "example": "32", "nullable": true, "minimum": 0 @@ -1329,7 +1330,8 @@ "type": "integer", "format": "int32", "description": "The maximum number of tokens that can be generated in the chat completion.", - "default": "32", + "default": "1024", + "example": "32", "nullable": true, "minimum": 0 }, @@ -1591,7 +1593,7 @@ "type": "integer", "format": "int32", "description": "Maximum number of tokens to generate.", - "default": "100", + "default": "1024", "example": "20", "nullable": true, "minimum": 0 diff --git a/router/src/lib.rs b/router/src/lib.rs index bb040397..40076564 100644 --- a/router/src/lib.rs +++ b/router/src/lib.rs @@ -333,7 +333,7 @@ pub(crate) struct GenerateParameters { /// Maximum number of tokens to generate. #[serde(default)] - #[schema(nullable = true, default = "256", example = "20")] + #[schema(nullable = true, default = "1024", example = "20")] pub max_new_tokens: Option, /// Whether to prepend the prompt to the generated text @@ -460,7 +460,7 @@ pub struct CompletionRequest { /// The maximum number of tokens that can be generated in the chat completion. #[serde(default)] - #[schema(default = "32")] + #[schema(default = "1024", example = "32")] pub max_tokens: Option, /// What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while @@ -838,7 +838,7 @@ pub(crate) struct ChatRequest { /// The maximum number of tokens that can be generated in the chat completion. #[serde(default)] - #[schema(example = "32")] + #[schema(default = "1024", example = "32")] pub max_tokens: Option, /// UNUSED diff --git a/router/src/validation.rs b/router/src/validation.rs index 14eaa186..8137ac58 100644 --- a/router/src/validation.rs +++ b/router/src/validation.rs @@ -22,7 +22,7 @@ use tokio::sync::oneshot; use tracing::{instrument, Span}; use {once_cell::sync::Lazy, regex::Regex}; -static DEFAULT_GENERATION_LENGTH: u32 = 10; +static DEFAULT_GENERATION_LENGTH: u32 = 1024; /// Validation #[derive(Debug, Clone)]