mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-07-27 18:30:16 +00:00
some minor fix
Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
This commit is contained in:
parent
06dfe9abfe
commit
adadc41f56
server/text_generation_server/layers
@ -171,6 +171,7 @@ class GPTQWeightsLoader(WeightsLoader):
|
||||
g_idx=g_idx,
|
||||
bits=self.bits,
|
||||
groupsize=self.groupsize,
|
||||
use_awq_kernel=self.quantize == "awq",
|
||||
use_exllama=use_exllama,
|
||||
)
|
||||
|
||||
|
@ -85,6 +85,8 @@ class UnquantizedSparseMoELayer(nn.Module):
|
||||
use_grouped_topk=self.n_expert_group is not None,
|
||||
num_expert_group=self.n_expert_group,
|
||||
topk_group=self.topk_group,
|
||||
scoring_func=self.scoring_func,
|
||||
e_score_correction_bias=self.e_score_correction_bias,
|
||||
)
|
||||
return fused_moe(
|
||||
x,
|
||||
|
Loading…
Reference in New Issue
Block a user