minor fix

Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
2025-09-08 19:04:52 +00:00 · 2025-07-01 00:56:50 -07:00 · 2025-07-01 00:56:50 -07:00 · 6382b4261c
commit 6382b4261c
parent 7c8694545f
1 changed files with 2 additions and 3 deletions
--- a/server/text_generation_server/layers/lora.py
+++ b/server/text_generation_server/layers/lora.py
@ -51,9 +51,8 @@ class LoraLinear(nn.Module):
            return result
        data: Optional["BatchLoraWeights"] = adapter_data.data.get(layer_type)

-        if (
-            data is not None
-            and SYSTEM == "ipex"
+        if data is not None and (
+            SYSTEM == "ipex"
            or (punica_sgmv is not None and data.can_vectorize(self.process_group))
        ):
            # In tensor-parallel configurations, each GPU processes a specific segment of the output.