From d7a24c03cfb466cdb563bfa129e3d738b7b2d73f Mon Sep 17 00:00:00 2001 From: "Wang, Yi" Date: Tue, 25 Feb 2025 19:07:55 +0800 Subject: [PATCH] some minor fix (#3048) Signed-off-by: Wang, Yi A --- server/text_generation_server/layers/gptq/__init__.py | 1 + server/text_generation_server/layers/moe/unquantized.py | 2 ++ 2 files changed, 3 insertions(+) diff --git a/server/text_generation_server/layers/gptq/__init__.py b/server/text_generation_server/layers/gptq/__init__.py index 7e838035..25387682 100644 --- a/server/text_generation_server/layers/gptq/__init__.py +++ b/server/text_generation_server/layers/gptq/__init__.py @@ -171,6 +171,7 @@ class GPTQWeightsLoader(WeightsLoader): g_idx=g_idx, bits=self.bits, groupsize=self.groupsize, + use_awq_kernel=self.quantize == "awq", use_exllama=use_exllama, ) diff --git a/server/text_generation_server/layers/moe/unquantized.py b/server/text_generation_server/layers/moe/unquantized.py index 77214286..007f99d0 100644 --- a/server/text_generation_server/layers/moe/unquantized.py +++ b/server/text_generation_server/layers/moe/unquantized.py @@ -85,6 +85,8 @@ class UnquantizedSparseMoELayer(nn.Module): use_grouped_topk=self.n_expert_group is not None, num_expert_group=self.n_expert_group, topk_group=self.topk_group, + scoring_func=self.scoring_func, + e_score_correction_bias=self.e_score_correction_bias, ) return fused_moe( x,