update default

This commit is contained in:
OlivierDehaene 2024-12-05 18:22:54 +01:00
parent 124eea2d0e
commit 7f1c22a72b
No known key found for this signature in database
GPG Key ID: BB104D67809DA93C
3 changed files with 8 additions and 6 deletions

View File

@ -1013,6 +1013,7 @@
"type": "integer", "type": "integer",
"format": "int32", "format": "int32",
"description": "The maximum number of tokens that can be generated in the chat completion.", "description": "The maximum number of tokens that can be generated in the chat completion.",
"default": "1024",
"example": "32", "example": "32",
"nullable": true, "nullable": true,
"minimum": 0 "minimum": 0
@ -1329,7 +1330,8 @@
"type": "integer", "type": "integer",
"format": "int32", "format": "int32",
"description": "The maximum number of tokens that can be generated in the chat completion.", "description": "The maximum number of tokens that can be generated in the chat completion.",
"default": "32", "default": "1024",
"example": "32",
"nullable": true, "nullable": true,
"minimum": 0 "minimum": 0
}, },
@ -1591,7 +1593,7 @@
"type": "integer", "type": "integer",
"format": "int32", "format": "int32",
"description": "Maximum number of tokens to generate.", "description": "Maximum number of tokens to generate.",
"default": "100", "default": "1024",
"example": "20", "example": "20",
"nullable": true, "nullable": true,
"minimum": 0 "minimum": 0

View File

@ -333,7 +333,7 @@ pub(crate) struct GenerateParameters {
/// Maximum number of tokens to generate. /// Maximum number of tokens to generate.
#[serde(default)] #[serde(default)]
#[schema(nullable = true, default = "256", example = "20")] #[schema(nullable = true, default = "1024", example = "20")]
pub max_new_tokens: Option<u32>, pub max_new_tokens: Option<u32>,
/// Whether to prepend the prompt to the generated text /// Whether to prepend the prompt to the generated text
@ -460,7 +460,7 @@ pub struct CompletionRequest {
/// The maximum number of tokens that can be generated in the chat completion. /// The maximum number of tokens that can be generated in the chat completion.
#[serde(default)] #[serde(default)]
#[schema(default = "32")] #[schema(default = "1024", example = "32")]
pub max_tokens: Option<u32>, pub max_tokens: Option<u32>,
/// What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while /// What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while
@ -838,7 +838,7 @@ pub(crate) struct ChatRequest {
/// The maximum number of tokens that can be generated in the chat completion. /// The maximum number of tokens that can be generated in the chat completion.
#[serde(default)] #[serde(default)]
#[schema(example = "32")] #[schema(default = "1024", example = "32")]
pub max_tokens: Option<u32>, pub max_tokens: Option<u32>,
/// UNUSED /// UNUSED

View File

@ -22,7 +22,7 @@ use tokio::sync::oneshot;
use tracing::{instrument, Span}; use tracing::{instrument, Span};
use {once_cell::sync::Lazy, regex::Regex}; use {once_cell::sync::Lazy, regex::Regex};
static DEFAULT_GENERATION_LENGTH: u32 = 10; static DEFAULT_GENERATION_LENGTH: u32 = 1024;
/// Validation /// Validation
#[derive(Debug, Clone)] #[derive(Debug, Clone)]