From 098d3133946efa6f99bc5b453e2fae42e42d9fd3 Mon Sep 17 00:00:00 2001 From: "Wang, Yi A" Date: Tue, 17 Sep 2024 19:52:42 -0700 Subject: [PATCH] hotfix: ipex fails since cuda moe kernel is not supported Signed-off-by: Wang, Yi A --- .../models/custom_modeling/flash_deepseek_v2_modeling.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/server/text_generation_server/models/custom_modeling/flash_deepseek_v2_modeling.py b/server/text_generation_server/models/custom_modeling/flash_deepseek_v2_modeling.py index 12be08cd..328f239b 100644 --- a/server/text_generation_server/models/custom_modeling/flash_deepseek_v2_modeling.py +++ b/server/text_generation_server/models/custom_modeling/flash_deepseek_v2_modeling.py @@ -15,7 +15,6 @@ from typing import List, Optional, Tuple -from moe_kernels.fused_moe import grouped_topk import torch import torch.distributed from text_generation_server.layers import ( @@ -41,6 +40,9 @@ from torch import nn from transformers.activations import ACT2FN from transformers.configuration_utils import PretrainedConfig +if SYSTEM != "ipex": + from moe_kernels.fused_moe import grouped_topk + if SYSTEM == "rocm": try: from vllm import _custom_C