minor fix

Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
2025-09-09 19:34:53 +00:00 · 2025-07-01 00:56:50 -07:00 · 2025-07-01 00:56:50 -07:00 · 6382b4261c
commit 6382b4261c
parent 7c8694545f
1 changed files with 2 additions and 3 deletions
--- a/server/text_generation_server/layers/lora.py
+++ b/server/text_generation_server/layers/lora.py
@ -51,9 +51,8 @@ class LoraLinear(nn.Module):
            return result
        data: Optional["BatchLoraWeights"] = adapter_data.data.get(layer_type)
-        if (
+        if data is not None and (
-            data is not None
+            SYSTEM == "ipex"
            and SYSTEM == "ipex"
            or (punica_sgmv is not None and data.can_vectorize(self.process_group))
        ):
            # In tensor-parallel configurations, each GPU processes a specific segment of the output.