From 4ee34f64c6dee4ef85fcfe5610f00a51287b9dc4 Mon Sep 17 00:00:00 2001
From: regisss <15324346+regisss@users.noreply.github.com>
Date: Sat, 10 May 2025 17:04:32 +0000
Subject: [PATCH] Make style 2

---
 .../server/text_generation_server/models/vlm_causal_lm.py   | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/backends/gaudi/server/text_generation_server/models/vlm_causal_lm.py b/backends/gaudi/server/text_generation_server/models/vlm_causal_lm.py
index 6d0f920e..0e37609e 100644
--- a/backends/gaudi/server/text_generation_server/models/vlm_causal_lm.py
+++ b/backends/gaudi/server/text_generation_server/models/vlm_causal_lm.py
@@ -232,7 +232,7 @@ class VlmCausalLMBatch(CausalLMBatch):
         self.prefilling = prefilling
 
     @property
-    def token_idx(self):
+    def token_idx(self):  # noqa: F811
         if self.prefilling:
             # no right padding for prefill
             token_idx_scalar = self.attention_mask.shape[-1] - 1
@@ -1534,8 +1534,8 @@ class VlmCausalLM(Model):
 
         except Exception:
             raise RuntimeError(
-                f"Not enough memory to handle following prefill and decode warmup."
-                f"You need to decrease `--max-batch-prefill-tokens`"
+                "Not enough memory to handle following prefill and decode warmup."
+                "You need to decrease `--max-batch-prefill-tokens`"
             )
 
         mem_stats = get_hpu_memory_stats(self.device)