fix bt

2025-04-20 22:32:07 +00:00 · 2025-04-11 15:10:19 +00:00 · 2025-04-11 15:10:19 +00:00 · a7353c35e8
commit a7353c35e8
parent d2f8caff2b
1 changed files with 1 additions and 0 deletions
--- a/server/text_generation_server/models/transformers_flash_vlm.py
+++ b/server/text_generation_server/models/transformers_flash_vlm.py
@ -769,6 +769,7 @@ class TransformersLlama4VlmCausalLM(TransformersFlashVlmCausalLM):
                block_tables = self.cuda_graphs[max_bs]["block_tables"][: bs * max_bt]
            else:
                block_tables = self.cuda_graphs[max_bs]["block_tables"][:bs]
                block_tables_local = self.cuda_graphs[max_bs]["block_tables_local"][:bs]
            slots = self.cuda_graphs[max_bs]["slots"][:bs]
            input_lengths_tensor = self.cuda_graphs[max_bs]["input_lengths"][:bs]
            cache_lengths_tensor = self.cuda_graphs[max_bs]["cache_lengths"][:bs]