mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-09 03:14:53 +00:00
Make style 2
This commit is contained in:
parent
afbebe6990
commit
4ee34f64c6
@ -232,7 +232,7 @@ class VlmCausalLMBatch(CausalLMBatch):
|
||||
self.prefilling = prefilling
|
||||
|
||||
@property
|
||||
def token_idx(self):
|
||||
def token_idx(self): # noqa: F811
|
||||
if self.prefilling:
|
||||
# no right padding for prefill
|
||||
token_idx_scalar = self.attention_mask.shape[-1] - 1
|
||||
@ -1534,8 +1534,8 @@ class VlmCausalLM(Model):
|
||||
|
||||
except Exception:
|
||||
raise RuntimeError(
|
||||
f"Not enough memory to handle following prefill and decode warmup."
|
||||
f"You need to decrease `--max-batch-prefill-tokens`"
|
||||
"Not enough memory to handle following prefill and decode warmup."
|
||||
"You need to decrease `--max-batch-prefill-tokens`"
|
||||
)
|
||||
|
||||
mem_stats = get_hpu_memory_stats(self.device)
|
||||
|
Loading…
Reference in New Issue
Block a user