From 82a6cb82e1ff7148e7a24dd09ca725d9c2540582 Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Wed, 23 Oct 2024 17:26:18 +0800 Subject: [PATCH] fix. --- server/text_generation_server/models/flash_causal_lm.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/text_generation_server/models/flash_causal_lm.py b/server/text_generation_server/models/flash_causal_lm.py index 37d74279..ca45020c 100644 --- a/server/text_generation_server/models/flash_causal_lm.py +++ b/server/text_generation_server/models/flash_causal_lm.py @@ -1418,9 +1418,9 @@ class FlashCausalLM(Model): ) max_total_tokens = available_blocks else: - max_total_tokens = len(batch.input_ids) + max_total_tokens = sum(len(input_ids) for input_ids in batch.input_ids) max_input_tokens = ( - batch.num_blocks - 1 + max_total_tokens - 1 if max_input_tokens is None else max_input_tokens )