prefill bypass graph

Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
This commit is contained in:
Wang, Yi A 2025-04-15 00:27:07 -07:00
parent 6b21985c95
commit 5ec7f15d0c
2 changed files with 2 additions and 2 deletions

View File

@ -1785,7 +1785,7 @@ class FlashCausalLM(Model):
kwargs = {}
if htorch.utils.internal.is_lazy():
kwargs["bypass_hpu_graphs"] = False
kwargs["bypass_hpu_graphs"] = batch.prefilling
logits, speculative_logits = self.model.forward(
input_ids=input_ids,

View File

@ -455,7 +455,7 @@ class FlashMllamaCausalLM(FlashVlmCausalLM):
kwargs = {}
if htorch.utils.internal.is_lazy():
kwargs["bypass_hpu_graphs"] = False
kwargs["bypass_hpu_graphs"] = batch.prefilling
if batch.prefill_cache_indices is not None:
slots_pad = torch.zeros_like(input_ids)
slots_pad[batch.prefill_cache_indices] = slots