From 80c23bdd3879ecf41e2f74011141a590e90132d2 Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Fri, 26 Apr 2024 19:16:39 +0200 Subject: [PATCH] Changing the waiting_served_ratio default (stack more aggressively by default). --- launcher/src/main.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/launcher/src/main.rs b/launcher/src/main.rs index f264e000..28226fb4 100644 --- a/launcher/src/main.rs +++ b/launcher/src/main.rs @@ -251,7 +251,7 @@ struct Args { /// /// This setting is only applied if there is room in the batch /// as defined by `max_batch_total_tokens`. - #[clap(default_value = "1.2", long, env)] + #[clap(default_value = "0.3", long, env)] waiting_served_ratio: f32, /// Limits the number of tokens for the prefill operation.