mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 20:34:54 +00:00
update default
This commit is contained in:
parent
124eea2d0e
commit
7f1c22a72b
@ -1013,6 +1013,7 @@
|
|||||||
"type": "integer",
|
"type": "integer",
|
||||||
"format": "int32",
|
"format": "int32",
|
||||||
"description": "The maximum number of tokens that can be generated in the chat completion.",
|
"description": "The maximum number of tokens that can be generated in the chat completion.",
|
||||||
|
"default": "1024",
|
||||||
"example": "32",
|
"example": "32",
|
||||||
"nullable": true,
|
"nullable": true,
|
||||||
"minimum": 0
|
"minimum": 0
|
||||||
@ -1329,7 +1330,8 @@
|
|||||||
"type": "integer",
|
"type": "integer",
|
||||||
"format": "int32",
|
"format": "int32",
|
||||||
"description": "The maximum number of tokens that can be generated in the chat completion.",
|
"description": "The maximum number of tokens that can be generated in the chat completion.",
|
||||||
"default": "32",
|
"default": "1024",
|
||||||
|
"example": "32",
|
||||||
"nullable": true,
|
"nullable": true,
|
||||||
"minimum": 0
|
"minimum": 0
|
||||||
},
|
},
|
||||||
@ -1591,7 +1593,7 @@
|
|||||||
"type": "integer",
|
"type": "integer",
|
||||||
"format": "int32",
|
"format": "int32",
|
||||||
"description": "Maximum number of tokens to generate.",
|
"description": "Maximum number of tokens to generate.",
|
||||||
"default": "100",
|
"default": "1024",
|
||||||
"example": "20",
|
"example": "20",
|
||||||
"nullable": true,
|
"nullable": true,
|
||||||
"minimum": 0
|
"minimum": 0
|
||||||
|
@ -333,7 +333,7 @@ pub(crate) struct GenerateParameters {
|
|||||||
|
|
||||||
/// Maximum number of tokens to generate.
|
/// Maximum number of tokens to generate.
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
#[schema(nullable = true, default = "256", example = "20")]
|
#[schema(nullable = true, default = "1024", example = "20")]
|
||||||
pub max_new_tokens: Option<u32>,
|
pub max_new_tokens: Option<u32>,
|
||||||
|
|
||||||
/// Whether to prepend the prompt to the generated text
|
/// Whether to prepend the prompt to the generated text
|
||||||
@ -460,7 +460,7 @@ pub struct CompletionRequest {
|
|||||||
|
|
||||||
/// The maximum number of tokens that can be generated in the chat completion.
|
/// The maximum number of tokens that can be generated in the chat completion.
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
#[schema(default = "32")]
|
#[schema(default = "1024", example = "32")]
|
||||||
pub max_tokens: Option<u32>,
|
pub max_tokens: Option<u32>,
|
||||||
|
|
||||||
/// What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while
|
/// What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while
|
||||||
@ -838,7 +838,7 @@ pub(crate) struct ChatRequest {
|
|||||||
|
|
||||||
/// The maximum number of tokens that can be generated in the chat completion.
|
/// The maximum number of tokens that can be generated in the chat completion.
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
#[schema(example = "32")]
|
#[schema(default = "1024", example = "32")]
|
||||||
pub max_tokens: Option<u32>,
|
pub max_tokens: Option<u32>,
|
||||||
|
|
||||||
/// UNUSED
|
/// UNUSED
|
||||||
|
@ -22,7 +22,7 @@ use tokio::sync::oneshot;
|
|||||||
use tracing::{instrument, Span};
|
use tracing::{instrument, Span};
|
||||||
use {once_cell::sync::Lazy, regex::Regex};
|
use {once_cell::sync::Lazy, regex::Regex};
|
||||||
|
|
||||||
static DEFAULT_GENERATION_LENGTH: u32 = 10;
|
static DEFAULT_GENERATION_LENGTH: u32 = 1024;
|
||||||
|
|
||||||
/// Validation
|
/// Validation
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
|
Loading…
Reference in New Issue
Block a user