mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-20 22:32:07 +00:00
fix bt
This commit is contained in:
parent
d2f8caff2b
commit
a7353c35e8
@ -769,6 +769,7 @@ class TransformersLlama4VlmCausalLM(TransformersFlashVlmCausalLM):
|
|||||||
block_tables = self.cuda_graphs[max_bs]["block_tables"][: bs * max_bt]
|
block_tables = self.cuda_graphs[max_bs]["block_tables"][: bs * max_bt]
|
||||||
else:
|
else:
|
||||||
block_tables = self.cuda_graphs[max_bs]["block_tables"][:bs]
|
block_tables = self.cuda_graphs[max_bs]["block_tables"][:bs]
|
||||||
|
block_tables_local = self.cuda_graphs[max_bs]["block_tables_local"][:bs]
|
||||||
slots = self.cuda_graphs[max_bs]["slots"][:bs]
|
slots = self.cuda_graphs[max_bs]["slots"][:bs]
|
||||||
input_lengths_tensor = self.cuda_graphs[max_bs]["input_lengths"][:bs]
|
input_lengths_tensor = self.cuda_graphs[max_bs]["input_lengths"][:bs]
|
||||||
cache_lengths_tensor = self.cuda_graphs[max_bs]["cache_lengths"][:bs]
|
cache_lengths_tensor = self.cuda_graphs[max_bs]["cache_lengths"][:bs]
|
||||||
|
Loading…
Reference in New Issue
Block a user