From 91c40e6c58db0c7c59563c64dfa855830773b5c2 Mon Sep 17 00:00:00 2001 From: yuanwu Date: Wed, 11 Jun 2025 02:34:09 +0000 Subject: [PATCH] Fix crash Signed-off-by: yuanwu --- .../models/custom_modeling/flash_qwen3_moe_modeling.py | 1 - 1 file changed, 1 deletion(-) diff --git a/backends/gaudi/server/text_generation_server/models/custom_modeling/flash_qwen3_moe_modeling.py b/backends/gaudi/server/text_generation_server/models/custom_modeling/flash_qwen3_moe_modeling.py index f1e73f46..583c4ad0 100644 --- a/backends/gaudi/server/text_generation_server/models/custom_modeling/flash_qwen3_moe_modeling.py +++ b/backends/gaudi/server/text_generation_server/models/custom_modeling/flash_qwen3_moe_modeling.py @@ -185,7 +185,6 @@ class Qwen3MoeAttention(nn.Module): seqlen=seqlen, softmax_scale=self.scaling, window_size_left=self.max_past, - num_key_value_groups=self.num_key_value_groups, ) # Decode else: