From fe7594e3696e08e55669bbf0ce0bd3b38c2b444c Mon Sep 17 00:00:00 2001 From: Yuan Wu Date: Fri, 24 Jan 2025 00:26:17 +0800 Subject: [PATCH] Fix the warmup issue of prefill batch_size (#268) Signed-off-by: yuanwu --- server/text_generation_server/models/causal_lm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/text_generation_server/models/causal_lm.py b/server/text_generation_server/models/causal_lm.py index f1c9c0bf8..44662388f 100644 --- a/server/text_generation_server/models/causal_lm.py +++ b/server/text_generation_server/models/causal_lm.py @@ -1197,7 +1197,7 @@ class CausalLM(Model): # Warmup prefill batch_size max_input_length = request.max_input_length - prefill_batch_size_list = [batch for batch in range(BATCH_BUCKET_SIZE, max_prefill_batch_size, BATCH_BUCKET_SIZE)] + prefill_batch_size_list = [batch for batch in range(PREFILL_BATCH_BUCKET_SIZE, max_prefill_batch_size, PREFILL_BATCH_BUCKET_SIZE)] prefill_batch_size_list.append(max_prefill_batch_size) prefill_seqlen_list = [seq for seq in range(PAD_SEQUENCE_TO_MULTIPLE_OF, max_input_length, PAD_SEQUENCE_TO_MULTIPLE_OF)] prefill_seqlen_list.append(max_input_length)