mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 12:24:53 +00:00
Apply suggestions from code review
Co-authored-by: Mohit Sharma <mohit21sharma.ms@gmail.com>
This commit is contained in:
parent
9376066b24
commit
f56e24b346
@ -279,9 +279,9 @@ RUN git clone https://github.com/danieldk/marlin-kernels.git && \
|
||||
|
||||
FROM kernel-builder AS moe-kernels
|
||||
WORKDIR /usr/src
|
||||
ENV MOE_KERNELS_BRANCH=9fb72ccb00b8da96a41f9b9c48f305c7dbd266bc
|
||||
ENV MOE_KERNELS_BRANCH=d7e042bf9f7aff10c631212fc71b24895d66eb59
|
||||
ENV VLLM_TARGET_DEVICE=rocm
|
||||
RUN git clone https://github.com/mht-sharma/moe-kernels.git && \
|
||||
RUN git clone https://github.com/danieldk/moe-kernels.git && \
|
||||
cd moe-kernels && \
|
||||
git checkout ${MOE_KERNELS_BRANCH} && \
|
||||
python setup.py install
|
||||
|
@ -73,8 +73,6 @@ class UnquantizedSparseMoELayer(nn.Module):
|
||||
num_expert_group=self.n_expert_group,
|
||||
topk_group=self.topk_group,
|
||||
)
|
||||
# from loguru import logger
|
||||
# logger.info("Fused MoE is used here")
|
||||
return fused_moe(
|
||||
x,
|
||||
w1=self.gate_up_proj,
|
||||
|
Loading…
Reference in New Issue
Block a user