diff --git a/server/text_generation_server/layers/moe/gptq_marlin.py b/server/text_generation_server/layers/moe/gptq_marlin.py index d909f397..014a90dc 100644 --- a/server/text_generation_server/layers/moe/gptq_marlin.py +++ b/server/text_generation_server/layers/moe/gptq_marlin.py @@ -72,7 +72,7 @@ class GPTQMarlinSparseMoELayer(nn.Module): scoring_func: Optional[str] = None, e_score_correction_bias: Optional[float] = None, ): - assert scoring_func is None, "scoring func is not handled" + assert scoring_func == "softmax", f"scoring func {scoring_func} is not handled" assert e_score_correction_bias is None, "scoring correction bias is not handled" super().__init__()