Make style 2

This commit is contained in:
regisss 2025-05-10 17:04:32 +00:00
parent afbebe6990
commit 4ee34f64c6

View File

@ -232,7 +232,7 @@ class VlmCausalLMBatch(CausalLMBatch):
self.prefilling = prefilling self.prefilling = prefilling
@property @property
def token_idx(self): def token_idx(self): # noqa: F811
if self.prefilling: if self.prefilling:
# no right padding for prefill # no right padding for prefill
token_idx_scalar = self.attention_mask.shape[-1] - 1 token_idx_scalar = self.attention_mask.shape[-1] - 1
@ -1534,8 +1534,8 @@ class VlmCausalLM(Model):
except Exception: except Exception:
raise RuntimeError( raise RuntimeError(
f"Not enough memory to handle following prefill and decode warmup." "Not enough memory to handle following prefill and decode warmup."
f"You need to decrease `--max-batch-prefill-tokens`" "You need to decrease `--max-batch-prefill-tokens`"
) )
mem_stats = get_hpu_memory_stats(self.device) mem_stats = get_hpu_memory_stats(self.device)