This commit is contained in:
Mohit Sharma 2025-04-11 15:10:19 +00:00
parent d2f8caff2b
commit a7353c35e8

View File

@ -769,6 +769,7 @@ class TransformersLlama4VlmCausalLM(TransformersFlashVlmCausalLM):
block_tables = self.cuda_graphs[max_bs]["block_tables"][: bs * max_bt] block_tables = self.cuda_graphs[max_bs]["block_tables"][: bs * max_bt]
else: else:
block_tables = self.cuda_graphs[max_bs]["block_tables"][:bs] block_tables = self.cuda_graphs[max_bs]["block_tables"][:bs]
block_tables_local = self.cuda_graphs[max_bs]["block_tables_local"][:bs]
slots = self.cuda_graphs[max_bs]["slots"][:bs] slots = self.cuda_graphs[max_bs]["slots"][:bs]
input_lengths_tensor = self.cuda_graphs[max_bs]["input_lengths"][:bs] input_lengths_tensor = self.cuda_graphs[max_bs]["input_lengths"][:bs]
cache_lengths_tensor = self.cuda_graphs[max_bs]["cache_lengths"][:bs] cache_lengths_tensor = self.cuda_graphs[max_bs]["cache_lengths"][:bs]