mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-21 23:12:07 +00:00
Fix the warmup issue of prefill batch_size (#268)
Signed-off-by: yuanwu <yuan.wu@intel.com>
This commit is contained in:
parent
63c64bb307
commit
fe7594e369
@ -1197,7 +1197,7 @@ class CausalLM(Model):
|
|||||||
|
|
||||||
# Warmup prefill batch_size
|
# Warmup prefill batch_size
|
||||||
max_input_length = request.max_input_length
|
max_input_length = request.max_input_length
|
||||||
prefill_batch_size_list = [batch for batch in range(BATCH_BUCKET_SIZE, max_prefill_batch_size, BATCH_BUCKET_SIZE)]
|
prefill_batch_size_list = [batch for batch in range(PREFILL_BATCH_BUCKET_SIZE, max_prefill_batch_size, PREFILL_BATCH_BUCKET_SIZE)]
|
||||||
prefill_batch_size_list.append(max_prefill_batch_size)
|
prefill_batch_size_list.append(max_prefill_batch_size)
|
||||||
prefill_seqlen_list = [seq for seq in range(PAD_SEQUENCE_TO_MULTIPLE_OF, max_input_length, PAD_SEQUENCE_TO_MULTIPLE_OF)]
|
prefill_seqlen_list = [seq for seq in range(PAD_SEQUENCE_TO_MULTIPLE_OF, max_input_length, PAD_SEQUENCE_TO_MULTIPLE_OF)]
|
||||||
prefill_seqlen_list.append(max_input_length)
|
prefill_seqlen_list.append(max_input_length)
|
||||||
|
Loading…
Reference in New Issue
Block a user