mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 20:34:54 +00:00
Changing the waiting_served_ratio default (stack more aggressively by
default).
This commit is contained in:
parent
a8fd4236eb
commit
80c23bdd38
@ -251,7 +251,7 @@ struct Args {
|
|||||||
///
|
///
|
||||||
/// This setting is only applied if there is room in the batch
|
/// This setting is only applied if there is room in the batch
|
||||||
/// as defined by `max_batch_total_tokens`.
|
/// as defined by `max_batch_total_tokens`.
|
||||||
#[clap(default_value = "1.2", long, env)]
|
#[clap(default_value = "0.3", long, env)]
|
||||||
waiting_served_ratio: f32,
|
waiting_served_ratio: f32,
|
||||||
|
|
||||||
/// Limits the number of tokens for the prefill operation.
|
/// Limits the number of tokens for the prefill operation.
|
||||||
|
Loading…
Reference in New Issue
Block a user