From 6382b4261c07216f78574615d705a4b29c89fe92 Mon Sep 17 00:00:00 2001 From: "Wang, Yi A" Date: Tue, 1 Jul 2025 00:56:50 -0700 Subject: [PATCH] minor fix Signed-off-by: Wang, Yi A --- server/text_generation_server/layers/lora.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/server/text_generation_server/layers/lora.py b/server/text_generation_server/layers/lora.py index dc5072bb..daac638c 100644 --- a/server/text_generation_server/layers/lora.py +++ b/server/text_generation_server/layers/lora.py @@ -51,9 +51,8 @@ class LoraLinear(nn.Module): return result data: Optional["BatchLoraWeights"] = adapter_data.data.get(layer_type) - if ( - data is not None - and SYSTEM == "ipex" + if data is not None and ( + SYSTEM == "ipex" or (punica_sgmv is not None and data.can_vectorize(self.process_group)) ): # In tensor-parallel configurations, each GPU processes a specific segment of the output.