mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-19 22:02:06 +00:00
parent
cea9dbc971
commit
d7a24c03cf
@ -171,6 +171,7 @@ class GPTQWeightsLoader(WeightsLoader):
|
|||||||
g_idx=g_idx,
|
g_idx=g_idx,
|
||||||
bits=self.bits,
|
bits=self.bits,
|
||||||
groupsize=self.groupsize,
|
groupsize=self.groupsize,
|
||||||
|
use_awq_kernel=self.quantize == "awq",
|
||||||
use_exllama=use_exllama,
|
use_exllama=use_exllama,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -85,6 +85,8 @@ class UnquantizedSparseMoELayer(nn.Module):
|
|||||||
use_grouped_topk=self.n_expert_group is not None,
|
use_grouped_topk=self.n_expert_group is not None,
|
||||||
num_expert_group=self.n_expert_group,
|
num_expert_group=self.n_expert_group,
|
||||||
topk_group=self.topk_group,
|
topk_group=self.topk_group,
|
||||||
|
scoring_func=self.scoring_func,
|
||||||
|
e_score_correction_bias=self.e_score_correction_bias,
|
||||||
)
|
)
|
||||||
return fused_moe(
|
return fused_moe(
|
||||||
x,
|
x,
|
||||||
|
Loading…
Reference in New Issue
Block a user