mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-09 11:24:53 +00:00
remove useless max_max_new_tokens
This commit is contained in:
parent
d6fc264a99
commit
32d7e5e20f
@ -20,14 +20,12 @@ use tracing_subscriber::{EnvFilter, Layer};
|
|||||||
struct Args {
|
struct Args {
|
||||||
#[clap(default_value = "128", long, env)]
|
#[clap(default_value = "128", long, env)]
|
||||||
max_concurrent_requests: usize,
|
max_concurrent_requests: usize,
|
||||||
#[clap(default_value = "512", long, env)]
|
|
||||||
max_max_new_tokens: u32,
|
|
||||||
#[clap(default_value = "4", long, env)]
|
#[clap(default_value = "4", long, env)]
|
||||||
max_stop_sequences: usize,
|
max_stop_sequences: usize,
|
||||||
#[clap(default_value = "1512", long, env)]
|
|
||||||
max_total_tokens: usize,
|
|
||||||
#[clap(default_value = "1000", long, env)]
|
#[clap(default_value = "1000", long, env)]
|
||||||
max_input_length: usize,
|
max_input_length: usize,
|
||||||
|
#[clap(default_value = "1512", long, env)]
|
||||||
|
max_total_tokens: usize,
|
||||||
#[clap(default_value = "32", long, env)]
|
#[clap(default_value = "32", long, env)]
|
||||||
max_batch_size: usize,
|
max_batch_size: usize,
|
||||||
#[clap(default_value = "20", long, env)]
|
#[clap(default_value = "20", long, env)]
|
||||||
@ -52,7 +50,6 @@ fn main() -> Result<(), std::io::Error> {
|
|||||||
// Pattern match configuration
|
// Pattern match configuration
|
||||||
let Args {
|
let Args {
|
||||||
max_concurrent_requests,
|
max_concurrent_requests,
|
||||||
max_max_new_tokens,
|
|
||||||
max_stop_sequences,
|
max_stop_sequences,
|
||||||
max_input_length,
|
max_input_length,
|
||||||
max_total_tokens,
|
max_total_tokens,
|
||||||
@ -101,7 +98,6 @@ fn main() -> Result<(), std::io::Error> {
|
|||||||
// Run server
|
// Run server
|
||||||
server::run(
|
server::run(
|
||||||
max_concurrent_requests,
|
max_concurrent_requests,
|
||||||
max_max_new_tokens,
|
|
||||||
max_stop_sequences,
|
max_stop_sequences,
|
||||||
max_input_length,
|
max_input_length,
|
||||||
max_total_tokens,
|
max_total_tokens,
|
||||||
|
@ -291,7 +291,6 @@ async fn generate_stream(
|
|||||||
#[allow(clippy::too_many_arguments)]
|
#[allow(clippy::too_many_arguments)]
|
||||||
pub async fn run(
|
pub async fn run(
|
||||||
max_concurrent_requests: usize,
|
max_concurrent_requests: usize,
|
||||||
max_max_new_tokens: u32,
|
|
||||||
max_stop_sequences: usize,
|
max_stop_sequences: usize,
|
||||||
max_input_length: usize,
|
max_input_length: usize,
|
||||||
max_total_tokens: usize,
|
max_total_tokens: usize,
|
||||||
@ -339,7 +338,6 @@ pub async fn run(
|
|||||||
let validation = Validation::new(
|
let validation = Validation::new(
|
||||||
validation_workers,
|
validation_workers,
|
||||||
tokenizer,
|
tokenizer,
|
||||||
max_max_new_tokens,
|
|
||||||
max_stop_sequences,
|
max_stop_sequences,
|
||||||
max_input_length,
|
max_input_length,
|
||||||
max_total_tokens,
|
max_total_tokens,
|
||||||
|
@ -20,7 +20,6 @@ impl Validation {
|
|||||||
pub(crate) fn new(
|
pub(crate) fn new(
|
||||||
workers: usize,
|
workers: usize,
|
||||||
tokenizer: Tokenizer,
|
tokenizer: Tokenizer,
|
||||||
max_max_new_tokens: u32,
|
|
||||||
max_stop_sequences: usize,
|
max_stop_sequences: usize,
|
||||||
max_input_length: usize,
|
max_input_length: usize,
|
||||||
max_total_tokens: usize,
|
max_total_tokens: usize,
|
||||||
@ -32,7 +31,6 @@ impl Validation {
|
|||||||
tokio::spawn(validation_task(
|
tokio::spawn(validation_task(
|
||||||
workers,
|
workers,
|
||||||
tokenizer,
|
tokenizer,
|
||||||
max_max_new_tokens,
|
|
||||||
max_stop_sequences,
|
max_stop_sequences,
|
||||||
max_input_length,
|
max_input_length,
|
||||||
max_total_tokens,
|
max_total_tokens,
|
||||||
@ -69,7 +67,6 @@ impl Validation {
|
|||||||
async fn validation_task(
|
async fn validation_task(
|
||||||
workers: usize,
|
workers: usize,
|
||||||
tokenizer: Tokenizer,
|
tokenizer: Tokenizer,
|
||||||
max_max_new_tokens: u32,
|
|
||||||
max_stop_sequences: usize,
|
max_stop_sequences: usize,
|
||||||
max_input_length: usize,
|
max_input_length: usize,
|
||||||
max_total_tokens: usize,
|
max_total_tokens: usize,
|
||||||
@ -88,7 +85,6 @@ async fn validation_task(
|
|||||||
tokio::task::spawn_blocking(move || {
|
tokio::task::spawn_blocking(move || {
|
||||||
validation_worker(
|
validation_worker(
|
||||||
tokenizer_clone,
|
tokenizer_clone,
|
||||||
max_max_new_tokens,
|
|
||||||
max_stop_sequences,
|
max_stop_sequences,
|
||||||
max_input_length,
|
max_input_length,
|
||||||
max_total_tokens,
|
max_total_tokens,
|
||||||
@ -113,7 +109,6 @@ async fn validation_task(
|
|||||||
/// the tokenizer
|
/// the tokenizer
|
||||||
fn validation_worker(
|
fn validation_worker(
|
||||||
tokenizer: Tokenizer,
|
tokenizer: Tokenizer,
|
||||||
max_max_new_tokens: u32,
|
|
||||||
max_stop_sequences: usize,
|
max_stop_sequences: usize,
|
||||||
max_input_length: usize,
|
max_input_length: usize,
|
||||||
max_total_tokens: usize,
|
max_total_tokens: usize,
|
||||||
@ -130,7 +125,6 @@ fn validation_worker(
|
|||||||
validate(
|
validate(
|
||||||
request,
|
request,
|
||||||
&tokenizer,
|
&tokenizer,
|
||||||
max_max_new_tokens,
|
|
||||||
max_stop_sequences,
|
max_stop_sequences,
|
||||||
max_input_length,
|
max_input_length,
|
||||||
max_total_tokens,
|
max_total_tokens,
|
||||||
@ -149,7 +143,6 @@ fn validation_worker(
|
|||||||
fn validate(
|
fn validate(
|
||||||
request: GenerateRequest,
|
request: GenerateRequest,
|
||||||
tokenizer: &Tokenizer,
|
tokenizer: &Tokenizer,
|
||||||
max_max_new_tokens: u32,
|
|
||||||
max_stop_sequences: usize,
|
max_stop_sequences: usize,
|
||||||
max_input_length: usize,
|
max_input_length: usize,
|
||||||
max_total_tokens: usize,
|
max_total_tokens: usize,
|
||||||
@ -194,8 +187,8 @@ fn validate(
|
|||||||
}
|
}
|
||||||
}?;
|
}?;
|
||||||
|
|
||||||
if max_new_tokens == 0 || max_new_tokens > max_max_new_tokens {
|
if max_new_tokens == 0 {
|
||||||
return Err(ValidationError::MaxNewTokens(max_max_new_tokens));
|
return Err(ValidationError::MaxNewTokens);
|
||||||
}
|
}
|
||||||
|
|
||||||
if stop_sequences.len() > max_stop_sequences {
|
if stop_sequences.len() > max_stop_sequences {
|
||||||
@ -226,7 +219,8 @@ fn validate(
|
|||||||
} else if total_tokens > max_total_tokens {
|
} else if total_tokens > max_total_tokens {
|
||||||
Err(ValidationError::MaxTotalTokens(
|
Err(ValidationError::MaxTotalTokens(
|
||||||
max_total_tokens,
|
max_total_tokens,
|
||||||
total_tokens,
|
input_length,
|
||||||
|
max_new_tokens,
|
||||||
))
|
))
|
||||||
} else {
|
} else {
|
||||||
// Return ValidGenerateRequest
|
// Return ValidGenerateRequest
|
||||||
@ -279,10 +273,10 @@ pub enum ValidationError {
|
|||||||
TopP,
|
TopP,
|
||||||
#[error("top_k must be strictly positive")]
|
#[error("top_k must be strictly positive")]
|
||||||
TopK,
|
TopK,
|
||||||
#[error("max_new_tokens must be strictly positive and <= {0}")]
|
#[error("max_new_tokens must be strictly positive")]
|
||||||
MaxNewTokens(u32),
|
MaxNewTokens,
|
||||||
#[error("input tokens + max_new_tokens must be <= {0}. Given {1}")]
|
#[error("input tokens + max_new_tokens must be <= {0}. Given: {1} input tokens and {2} max_new_tokens")]
|
||||||
MaxTotalTokens(usize, usize),
|
MaxTotalTokens(usize, usize, u32),
|
||||||
#[error("inputs must have less than {0} tokens. Given: {1}")]
|
#[error("inputs must have less than {0} tokens. Given: {1}")]
|
||||||
InputLength(usize, usize),
|
InputLength(usize, usize),
|
||||||
#[error("inputs cannot be empty")]
|
#[error("inputs cannot be empty")]
|
||||||
|
Loading…
Reference in New Issue
Block a user