From a1c78adc192634e05ec871270902bc556c658810 Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Wed, 5 Feb 2025 11:55:17 +0100 Subject: [PATCH] Revert dummy modifications. --- Dockerfile_amd | 34 ++++++++++++++++++++++++++++++---- Dockerfile_intel | 31 ++++++++++++++++++------------- 2 files changed, 48 insertions(+), 17 deletions(-) diff --git a/Dockerfile_amd b/Dockerfile_amd index 92a9fed7..79d983a7 100644 --- a/Dockerfile_amd +++ b/Dockerfile_amd @@ -1,5 +1,5 @@ # Rust builder -FROM lukemathwalker/cargo-chef:latest-rust-1.80.1 AS chef +FROM lukemathwalker/cargo-chef:latest-rust-1.84.0 AS chef WORKDIR /usr/src ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse @@ -104,7 +104,7 @@ RUN case ${TARGETPLATFORM} in \ /opt/conda/bin/conda clean -ya # Install flash-attention, torch dependencies -RUN python3 -m pip install --upgrade pip && pip install numpy einops ninja joblib msgpack cmake --no-cache-dir && rm -rf /var/lib/apt/lists/* +RUN python3 -m pip install --upgrade pip uv && pip install numpy einops ninja joblib msgpack cmake --no-cache-dir && rm -rf /var/lib/apt/lists/* RUN conda install mkl=2021 ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/rocm/lib/:/opt/conda/lib/python3.11/site-packages/torch/lib:/opt/conda/lib/ @@ -234,6 +234,7 @@ FROM kernel-builder AS vllm-builder WORKDIR /usr/src COPY server/Makefile-vllm Makefile +RUN pip install setuptools_scm # Build specific version of vllm RUN make build-vllm-rocm @@ -267,6 +268,24 @@ COPY server/exllamav2_kernels/ . RUN python setup.py build +FROM kernel-builder AS marlin-kernels +WORKDIR /usr/src +ENV MARLIN_KERNELS_BRANCH=v0.3.6 +ENV VLLM_TARGET_DEVICE=rocm +RUN git clone https://github.com/danieldk/marlin-kernels.git && \ + cd marlin-kernels && \ + git checkout ${MARLIN_KERNELS_BRANCH} && \ + python setup.py install + +FROM kernel-builder AS moe-kernels +WORKDIR /usr/src +ENV MOE_KERNELS_BRANCH=v0.8.2 +ENV VLLM_TARGET_DEVICE=rocm +RUN git clone https://github.com/danieldk/moe-kernels.git && \ + cd moe-kernels && \ + git checkout ${MOE_KERNELS_BRANCH} && \ + python setup.py install + FROM install_deps AS base-copy # Text Generation Inference base env @@ -289,14 +308,21 @@ COPY --from=exllama-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-311 # Copy build artifacts from exllamav2 kernels builder COPY --from=exllamav2-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-311 /opt/conda/lib/python3.11/site-packages +# Copy build artifacts from marlin kernels +COPY --from=marlin-kernels /usr/src/marlin-kernels/build/lib.linux-x86_64-cpython-311 /opt/conda/lib/python3.11/site-packages + +# Copy build artifacts from moe kernels +COPY --from=moe-kernels /usr/src/moe-kernels/build/lib.linux-x86_64-cpython-311 /opt/conda/lib/python3.11/site-packages + # Install server COPY proto proto COPY server server COPY server/Makefile server/Makefile +ENV UV_SYSTEM_PYTHON=1 RUN cd server && \ make gen-server && \ - pip install -r requirements_rocm.txt --no-cache-dir - # pip install ".[accelerate, compressed-tensors, peft, outlines]" --no-cache-dir + pip install -U pip uv && \ + uv pip install -r requirements_rocm.txt --no-cache-dir # Install benchmarker COPY --from=builder /usr/src/target/release-opt/text-generation-benchmark /usr/local/bin/text-generation-benchmark diff --git a/Dockerfile_intel b/Dockerfile_intel index 62fad650..9af43604 100644 --- a/Dockerfile_intel +++ b/Dockerfile_intel @@ -1,6 +1,6 @@ ARG PLATFORM=xpu -FROM lukemathwalker/cargo-chef:latest-rust-1.80.1 AS chef +FROM lukemathwalker/cargo-chef:latest-rust-1.84.0 AS chef WORKDIR /usr/src ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse @@ -97,11 +97,10 @@ ENV HF_HOME=/data \ WORKDIR /usr/src -RUN pip install https://intel-extension-for-pytorch.s3.us-east-1.amazonaws.com/ipex_dev/xpu/torch-2.5.0a0%2Bgite84e33f-cp311-cp311-linux_x86_64.whl --no-cache-dir -RUN pip install https://intel-extension-for-pytorch.s3.us-east-1.amazonaws.com/ipex_dev/xpu/torchaudio-2.5.0a0%2B56bc006-cp311-cp311-linux_x86_64.whl --no-cache-dir -RUN pip install https://intel-extension-for-pytorch.s3.us-east-1.amazonaws.com/ipex_dev/xpu/torchvision-0.20.0a0%2B8e8a208-cp311-cp311-linux_x86_64.whl --no-cache-dir -RUN pip install https://intel-extension-for-pytorch.s3.us-east-1.amazonaws.com/ipex_dev/xpu/intel_extension_for_pytorch-2.5.10%2Bgit9d489a8-cp311-cp311-linux_x86_64.whl --no-cache-dir -RUN pip install https://intel-extension-for-pytorch.s3.us-east-1.amazonaws.com/ipex_dev/xpu/oneccl_bind_pt-2.5.0%2Bxpu-cp311-cp311-linux_x86_64.whl --no-cache-dir +RUN pip install https://intel-optimized-pytorch.s3.cn-north-1.amazonaws.com.cn/ipex_dev/xpu/torch-2.5.0a0%2Bgite84e33f-cp311-cp311-linux_x86_64.whl --no-cache-dir +RUN pip install https://intel-optimized-pytorch.s3.cn-north-1.amazonaws.com.cn/ipex_dev/xpu/torchaudio-2.5.0a0%2B56bc006-cp311-cp311-linux_x86_64.whl --no-cache-dir +RUN pip install https://intel-optimized-pytorch.s3.cn-north-1.amazonaws.com.cn/ipex_dev/xpu/torchvision-0.20.0a0%2B8e8a208-cp311-cp311-linux_x86_64.whl --no-cache-dir +RUN pip install https://intel-optimized-pytorch.s3.cn-north-1.amazonaws.com.cn/ipex_dev/xpu/oneccl_bind_pt-2.5.0%2Bxpu-cp311-cp311-linux_x86_64.whl --no-cache-dir RUN pip install triton-xpu==3.0.0b2 --no-cache-dir @@ -109,15 +108,20 @@ RUN pip install triton-xpu==3.0.0b2 --no-cache-dir COPY proto proto COPY server server COPY server/Makefile server/Makefile +ENV UV_SYSTEM_PYTHON=1 RUN cd server && \ make gen-server && \ - pip install -r requirements_intel.txt && \ - pip install ".[accelerate, compressed-tensors, peft, outlines]" --no-cache-dir + pip install -U pip uv && \ + uv pip install -r requirements_intel.txt --no-cache-dir ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/intel/oneapi/pti/0.9/lib:/opt/conda/lib ENV CCL_ZE_IPC_EXCHANGE=sockets #ENV TORCH_LLM_ALLREDUCE=1 #ENV CCL_TOPO_FABRIC_VERTEX_CONNECTION_CHECK=0 +ENV TORCH_DEVICE_BACKEND_AUTOLOAD=0 + +RUN git clone https://github.com/intel/intel-extension-for-pytorch && cd intel-extension-for-pytorch && git checkout 1ccf72b2d11cd00b47aef6d6cd054c088aa6f083 +RUN cd intel-extension-for-pytorch && git submodule update --init --recursive && USE_AOT_DEVLIST='pvc,ats-m150' BUILD_SEPARATE_OPS=OFF BUILD_WITH_CPU=OFF USE_XETLA=ON python setup.py install && rm -rf /usr/src/intel-extension-for-pytorch # Install benchmarker COPY --from=builder /usr/src/target/release-opt/text-generation-benchmark /usr/local/bin/text-generation-benchmark @@ -209,10 +213,11 @@ ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/opt/conda/lib/" COPY proto proto COPY server server COPY server/Makefile server/Makefile +ENV UV_SYSTEM_PYTHON=1 RUN cd server && \ make gen-server && \ - uv pip install -r requirements_intel.txt --no-cache-dir - # pip install ".[accelerate, compressed-tensors, peft, outlines]" --no-cache-dir + pip install -U pip uv && \ + uv pip install -e ".[accelerate, compressed-tensors, peft, outlines]" --no-cache-dir # Install benchmarker COPY --from=builder /usr/src/target/release-opt/text-generation-benchmark /usr/local/bin/text-generation-benchmark @@ -222,9 +227,9 @@ COPY --from=builder /usr/src/target/release-opt/text-generation-router /usr/loca COPY --from=builder /usr/src/target/release-opt/text-generation-launcher /usr/local/bin/text-generation-launcher FROM ${PLATFORM} AS final -ENV ATTENTION=paged -ENV PREFIX_CACHING=0 -ENV PREFILL_CHUNKING=0 +ENV ATTENTION=flashdecoding-ipex +ENV PREFIX_CACHING=1 +ENV PREFILL_CHUNKING=1 ENV CUDA_GRAPHS=0 ENTRYPOINT ["text-generation-launcher"] CMD ["--json-output"]