Changing the waiting_served_ratio default (stack more aggressively by

default).
This commit is contained in:
Nicolas Patry 2024-04-26 19:16:39 +02:00
parent a8fd4236eb
commit 80c23bdd38

View File

@ -251,7 +251,7 @@ struct Args {
///
/// This setting is only applied if there is room in the batch
/// as defined by `max_batch_total_tokens`.
#[clap(default_value = "1.2", long, env)]
#[clap(default_value = "0.3", long, env)]
waiting_served_ratio: f32,
/// Limits the number of tokens for the prefill operation.