diff --git a/backends/gaudi/server/text_generation_server/models/custom_modeling/flash_qwen3_moe_modeling.py b/backends/gaudi/server/text_generation_server/models/custom_modeling/flash_qwen3_moe_modeling.py index f1e73f46..583c4ad0 100644 --- a/backends/gaudi/server/text_generation_server/models/custom_modeling/flash_qwen3_moe_modeling.py +++ b/backends/gaudi/server/text_generation_server/models/custom_modeling/flash_qwen3_moe_modeling.py @@ -185,7 +185,6 @@ class Qwen3MoeAttention(nn.Module): seqlen=seqlen, softmax_scale=self.scaling, window_size_left=self.max_past, - num_key_value_groups=self.num_key_value_groups, ) # Decode else: