mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-09 03:14:53 +00:00
Make style 2
This commit is contained in:
parent
afbebe6990
commit
4ee34f64c6
@ -232,7 +232,7 @@ class VlmCausalLMBatch(CausalLMBatch):
|
|||||||
self.prefilling = prefilling
|
self.prefilling = prefilling
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def token_idx(self):
|
def token_idx(self): # noqa: F811
|
||||||
if self.prefilling:
|
if self.prefilling:
|
||||||
# no right padding for prefill
|
# no right padding for prefill
|
||||||
token_idx_scalar = self.attention_mask.shape[-1] - 1
|
token_idx_scalar = self.attention_mask.shape[-1] - 1
|
||||||
@ -1534,8 +1534,8 @@ class VlmCausalLM(Model):
|
|||||||
|
|
||||||
except Exception:
|
except Exception:
|
||||||
raise RuntimeError(
|
raise RuntimeError(
|
||||||
f"Not enough memory to handle following prefill and decode warmup."
|
"Not enough memory to handle following prefill and decode warmup."
|
||||||
f"You need to decrease `--max-batch-prefill-tokens`"
|
"You need to decrease `--max-batch-prefill-tokens`"
|
||||||
)
|
)
|
||||||
|
|
||||||
mem_stats = get_hpu_memory_stats(self.device)
|
mem_stats = get_hpu_memory_stats(self.device)
|
||||||
|
Loading…
Reference in New Issue
Block a user