diff --git a/Dockerfile_amd b/Dockerfile_amd index 201d9010..8b7808be 100644 --- a/Dockerfile_amd +++ b/Dockerfile_amd @@ -279,9 +279,9 @@ RUN git clone https://github.com/danieldk/marlin-kernels.git && \ FROM kernel-builder AS moe-kernels WORKDIR /usr/src -ENV MOE_KERNELS_BRANCH=9fb72ccb00b8da96a41f9b9c48f305c7dbd266bc +ENV MOE_KERNELS_BRANCH=d7e042bf9f7aff10c631212fc71b24895d66eb59 ENV VLLM_TARGET_DEVICE=rocm -RUN git clone https://github.com/mht-sharma/moe-kernels.git && \ +RUN git clone https://github.com/danieldk/moe-kernels.git && \ cd moe-kernels && \ git checkout ${MOE_KERNELS_BRANCH} && \ python setup.py install diff --git a/server/text_generation_server/layers/moe/unquantized.py b/server/text_generation_server/layers/moe/unquantized.py index a104ca5d..9277384a 100644 --- a/server/text_generation_server/layers/moe/unquantized.py +++ b/server/text_generation_server/layers/moe/unquantized.py @@ -73,8 +73,6 @@ class UnquantizedSparseMoELayer(nn.Module): num_expert_group=self.n_expert_group, topk_group=self.topk_group, ) - # from loguru import logger - # logger.info("Fused MoE is used here") return fused_moe( x, w1=self.gate_up_proj,