From 4ee34f64c6dee4ef85fcfe5610f00a51287b9dc4 Mon Sep 17 00:00:00 2001 From: regisss <15324346+regisss@users.noreply.github.com> Date: Sat, 10 May 2025 17:04:32 +0000 Subject: [PATCH] Make style 2 --- .../server/text_generation_server/models/vlm_causal_lm.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/backends/gaudi/server/text_generation_server/models/vlm_causal_lm.py b/backends/gaudi/server/text_generation_server/models/vlm_causal_lm.py index 6d0f920e..0e37609e 100644 --- a/backends/gaudi/server/text_generation_server/models/vlm_causal_lm.py +++ b/backends/gaudi/server/text_generation_server/models/vlm_causal_lm.py @@ -232,7 +232,7 @@ class VlmCausalLMBatch(CausalLMBatch): self.prefilling = prefilling @property - def token_idx(self): + def token_idx(self): # noqa: F811 if self.prefilling: # no right padding for prefill token_idx_scalar = self.attention_mask.shape[-1] - 1 @@ -1534,8 +1534,8 @@ class VlmCausalLM(Model): except Exception: raise RuntimeError( - f"Not enough memory to handle following prefill and decode warmup." - f"You need to decrease `--max-batch-prefill-tokens`" + "Not enough memory to handle following prefill and decode warmup." + "You need to decrease `--max-batch-prefill-tokens`" ) mem_stats = get_hpu_memory_stats(self.device)