Changing the waiting_served_ratio default (stack more aggressively by

default).
2025-09-11 20:34:54 +00:00 · 2024-04-26 19:16:39 +02:00 · 2024-04-26 19:16:39 +02:00 · 80c23bdd38
commit 80c23bdd38
parent a8fd4236eb
1 changed files with 1 additions and 1 deletions
--- a/launcher/src/main.rs
+++ b/launcher/src/main.rs
@ -251,7 +251,7 @@ struct Args {
    ///
    /// This setting is only applied if there is room in the batch
    /// as defined by `max_batch_total_tokens`.
-    #[clap(default_value = "1.2", long, env)]
+    #[clap(default_value = "0.3", long, env)]
    waiting_served_ratio: f32,
    /// Limits the number of tokens for the prefill operation.