Fix the warmup issue of prefill batch_size (#268)

Signed-off-by: yuanwu <yuan.wu@intel.com>
2025-07-02 22:10:17 +00:00 · 2025-01-24 00:26:17 +08:00 · 2025-01-24 00:26:17 +08:00 · fe7594e369
commit fe7594e369
parent 63c64bb307
1 changed files with 1 additions and 1 deletions
--- a/server/text_generation_server/models/causal_lm.py
+++ b/server/text_generation_server/models/causal_lm.py
@ -1197,7 +1197,7 @@ class CausalLM(Model):

        # Warmup prefill batch_size
        max_input_length =  request.max_input_length
-        prefill_batch_size_list = [batch for batch in range(BATCH_BUCKET_SIZE, max_prefill_batch_size, BATCH_BUCKET_SIZE)]
+        prefill_batch_size_list = [batch for batch in range(PREFILL_BATCH_BUCKET_SIZE, max_prefill_batch_size, PREFILL_BATCH_BUCKET_SIZE)]
        prefill_batch_size_list.append(max_prefill_batch_size)
        prefill_seqlen_list = [seq for seq in range(PAD_SEQUENCE_TO_MULTIPLE_OF, max_input_length, PAD_SEQUENCE_TO_MULTIPLE_OF)]
        prefill_seqlen_list.append(max_input_length)