Align the default value with server's (#111)

Signed-off-by: yuanwu <yuan.wu@intel.com>
This commit is contained in:
yuanwu2017 2024-04-01 18:44:20 +08:00 committed by GitHub
parent 7342baa2eb
commit 3e28d7aa42
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -119,8 +119,8 @@ impl Client {
// get all possible prefill batch sizes
let max_prefill_batch_size: u32 = max_prefill_tokens / max_input_length;
let prefill_bucket_size: u32 = read_env_var("PREFILL_BATCH_BUCKET_SIZE", 1);
let batch_sizes: Vec<u32> = (1..max_prefill_batch_size+1).step_by(prefill_bucket_size as usize).collect();
let prefill_bucket_size: u32 = read_env_var("PREFILL_BATCH_BUCKET_SIZE", 4);
let batch_sizes: Vec<u32> = (prefill_bucket_size..max_prefill_batch_size+1).step_by(prefill_bucket_size as usize).collect();
// get all possible sequence lengths for prefill
let seq_bucket_size: u32 = read_env_var("PAD_SEQUENCE_TO_MULTIPLE_OF", 128);