From 3e28d7aa42aae3cfb148b8d09839d5837a5e3866 Mon Sep 17 00:00:00 2001 From: yuanwu2017 Date: Mon, 1 Apr 2024 18:44:20 +0800 Subject: [PATCH] Align the default value with server's (#111) Signed-off-by: yuanwu --- router/client/src/client.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/router/client/src/client.rs b/router/client/src/client.rs index 8c3f6da4..9522503d 100644 --- a/router/client/src/client.rs +++ b/router/client/src/client.rs @@ -119,8 +119,8 @@ impl Client { // get all possible prefill batch sizes let max_prefill_batch_size: u32 = max_prefill_tokens / max_input_length; - let prefill_bucket_size: u32 = read_env_var("PREFILL_BATCH_BUCKET_SIZE", 1); - let batch_sizes: Vec = (1..max_prefill_batch_size+1).step_by(prefill_bucket_size as usize).collect(); + let prefill_bucket_size: u32 = read_env_var("PREFILL_BATCH_BUCKET_SIZE", 4); + let batch_sizes: Vec = (prefill_bucket_size..max_prefill_batch_size+1).step_by(prefill_bucket_size as usize).collect(); // get all possible sequence lengths for prefill let seq_bucket_size: u32 = read_env_var("PAD_SEQUENCE_TO_MULTIPLE_OF", 128);