Fix the prefill warmup issue

Signed-off-by: yuanwu <yuan.wu@intel.com>
2025-09-11 20:34:54 +00:00 · 2024-11-01 05:08:18 +02:00 · 2024-11-01 05:08:18 +02:00 · fcf2e3a338
commit fcf2e3a338
parent 4c9856f9e5
1 changed files with 2 additions and 2 deletions
--- a/server/text_generation_server/models/causal_lm.py
+++ b/server/text_generation_server/models/causal_lm.py
@ -1265,8 +1265,8 @@ class CausalLM(Model):

        #Prefill and decode warmup
        try:
-            for batch_size in range(PREFILL_BATCH_BUCKET_SIZE, max_prefill_batch_size, PREFILL_BATCH_BUCKET_SIZE):
-                DECODE_WARMUP_BATCH_SIZE_LIST.append(batch_size)
+            for batch_size in range(PREFILL_BATCH_BUCKET_SIZE, max_prefill_batch_size + 1, PREFILL_BATCH_BUCKET_SIZE):
+                PREFILL_WARMUP_BATCH_SIZE_LIST.append(batch_size)
                for seq_len in PREFILL_WARMUP_SEQLEN_LIST :
                    batch = self.generate_warmup_batch(request, seq_len - 1, batch_size)
                    _, prefill_batch, _ = self.generate_token([batch], is_warmup)