mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-24 00:12:08 +00:00
Fix the prefill warmup issue
Signed-off-by: yuanwu <yuan.wu@intel.com>
This commit is contained in:
parent
4c9856f9e5
commit
fcf2e3a338
@ -1265,8 +1265,8 @@ class CausalLM(Model):
|
||||
|
||||
#Prefill and decode warmup
|
||||
try:
|
||||
for batch_size in range(PREFILL_BATCH_BUCKET_SIZE, max_prefill_batch_size, PREFILL_BATCH_BUCKET_SIZE):
|
||||
DECODE_WARMUP_BATCH_SIZE_LIST.append(batch_size)
|
||||
for batch_size in range(PREFILL_BATCH_BUCKET_SIZE, max_prefill_batch_size + 1, PREFILL_BATCH_BUCKET_SIZE):
|
||||
PREFILL_WARMUP_BATCH_SIZE_LIST.append(batch_size)
|
||||
for seq_len in PREFILL_WARMUP_SEQLEN_LIST :
|
||||
batch = self.generate_warmup_batch(request, seq_len - 1, batch_size)
|
||||
_, prefill_batch, _ = self.generate_token([batch], is_warmup)
|
||||
|
Loading…
Reference in New Issue
Block a user