mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-24 00:12:08 +00:00
Align the default value with server's (#111)
Signed-off-by: yuanwu <yuan.wu@intel.com>
This commit is contained in:
parent
7342baa2eb
commit
3e28d7aa42
@ -119,8 +119,8 @@ impl Client {
|
||||
|
||||
// get all possible prefill batch sizes
|
||||
let max_prefill_batch_size: u32 = max_prefill_tokens / max_input_length;
|
||||
let prefill_bucket_size: u32 = read_env_var("PREFILL_BATCH_BUCKET_SIZE", 1);
|
||||
let batch_sizes: Vec<u32> = (1..max_prefill_batch_size+1).step_by(prefill_bucket_size as usize).collect();
|
||||
let prefill_bucket_size: u32 = read_env_var("PREFILL_BATCH_BUCKET_SIZE", 4);
|
||||
let batch_sizes: Vec<u32> = (prefill_bucket_size..max_prefill_batch_size+1).step_by(prefill_bucket_size as usize).collect();
|
||||
|
||||
// get all possible sequence lengths for prefill
|
||||
let seq_bucket_size: u32 = read_env_var("PAD_SEQUENCE_TO_MULTIPLE_OF", 128);
|
||||
|
Loading…
Reference in New Issue
Block a user