From 5c3efbc7632cc4ce7365272bb031d2d705661593 Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Wed, 23 Oct 2024 15:23:39 +0800 Subject: [PATCH] Attempt #2 --- server/text_generation_server/models/flash_causal_lm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/text_generation_server/models/flash_causal_lm.py b/server/text_generation_server/models/flash_causal_lm.py index 6eb914f8..37d74279 100644 --- a/server/text_generation_server/models/flash_causal_lm.py +++ b/server/text_generation_server/models/flash_causal_lm.py @@ -1418,7 +1418,7 @@ class FlashCausalLM(Model): ) max_total_tokens = available_blocks else: - max_total_tokens = batch.num_blocks + max_total_tokens = len(batch.input_ids) max_input_tokens = ( batch.num_blocks - 1 if max_input_tokens is None