From cd313364a04c7efd2dde3c0158688df3953d3f0c Mon Sep 17 00:00:00 2001 From: fxmarty <9808326+fxmarty@users.noreply.github.com> Date: Thu, 9 May 2024 22:14:24 +0000 Subject: [PATCH] add debug dockerfile --- Dockerfile_amd | 21 ++-- Dockerfile_amd_nightly_no_patch | 199 ++++++++++++++++++++++++++++++++ 2 files changed, 210 insertions(+), 10 deletions(-) create mode 100644 Dockerfile_amd_nightly_no_patch diff --git a/Dockerfile_amd b/Dockerfile_amd index f1c68674..f6dffac5 100644 --- a/Dockerfile_amd +++ b/Dockerfile_amd @@ -109,7 +109,7 @@ ARG BUILD_CAFFE2="0" \ USE_NNPACK="0" \ USE_QNNPACK="0" \ USE_XNNPACK="0" \ - USE_FLASH_ATTENTION="0" \ + USE_FLASH_ATTENTION="1" \ USE_MEM_EFF_ATTENTION="0" RUN cd pytorch && python tools/amd_build/build_amd.py && python setup.py install @@ -184,10 +184,6 @@ COPY --from=exllamav2-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-31 COPY proto proto COPY server server COPY server/Makefile server/Makefile -RUN cd server && \ - make gen-server && \ - pip install -r requirements_rocm.txt && \ - pip install ".[accelerate, peft, outlines]" --no-cache-dir # Install benchmarker COPY --from=builder /usr/src/target/release/text-generation-benchmark /usr/local/bin/text-generation-benchmark @@ -196,6 +192,11 @@ COPY --from=builder /usr/src/target/release/text-generation-router /usr/local/bi # Install launcher COPY --from=builder /usr/src/target/release/text-generation-launcher /usr/local/bin/text-generation-launcher +RUN cd server && \ + make gen-server && \ + pip install -r requirements_rocm.txt + #pip install ".[accelerate, peft, outlines]" --no-cache-dir + # AWS Sagemaker compatible image FROM base as sagemaker @@ -205,10 +206,10 @@ RUN chmod +x entrypoint.sh ENTRYPOINT ["./entrypoint.sh"] # Final image -FROM base +FROM base-copy -COPY ./tgi-entrypoint.sh /tgi-entrypoint.sh -RUN chmod +x /tgi-entrypoint.sh +# COPY ./tgi-entrypoint.sh /tgi-entrypoint.sh +# RUN chmod +x /tgi-entrypoint.sh -ENTRYPOINT ["/tgi-entrypoint.sh"] -CMD ["--json-output"] +# ENTRYPOINT ["/tgi-entrypoint.sh"] +# CMD ["--json-output"] diff --git a/Dockerfile_amd_nightly_no_patch b/Dockerfile_amd_nightly_no_patch new file mode 100644 index 00000000..9d99fc16 --- /dev/null +++ b/Dockerfile_amd_nightly_no_patch @@ -0,0 +1,199 @@ +# Rust builder +FROM lukemathwalker/cargo-chef:latest-rust-1.75 AS chef +WORKDIR /usr/src + +ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse + +FROM chef as planner +COPY Cargo.toml Cargo.toml +COPY rust-toolchain.toml rust-toolchain.toml +COPY proto proto +COPY benchmark benchmark +COPY router router +COPY launcher launcher +RUN cargo chef prepare --recipe-path recipe.json + +FROM chef AS builder + +ARG GIT_SHA +ARG DOCKER_LABEL + +RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \ + curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \ + unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \ + unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \ + rm -f $PROTOC_ZIP + +COPY --from=planner /usr/src/recipe.json recipe.json +RUN cargo chef cook --release --recipe-path recipe.json + +COPY Cargo.toml Cargo.toml +COPY rust-toolchain.toml rust-toolchain.toml +COPY proto proto +COPY benchmark benchmark +COPY router router +COPY launcher launcher +RUN cargo build --release + +# Text Generation Inference base image for RoCm +FROM rocm/dev-ubuntu-22.04:6.1 as base + +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ + build-essential \ + ca-certificates \ + ccache \ + curl \ + git \ + make \ + libssl-dev \ + g++ \ + # Needed to build VLLM & flash. + rocthrust-dev \ + hipsparse-dev \ + hipblas-dev \ + hipblaslt-dev \ + rocblas-dev \ + hiprand-dev \ + rocrand-dev \ + miopen-hip-dev \ + hipfft-dev \ + hipcub-dev \ + hipsolver-dev \ + rccl-dev \ + cmake \ + python3-dev && \ + rm -rf /var/lib/apt/lists/* + +# Keep in sync with `server/pyproject.toml +ARG MAMBA_VERSION=23.1.0-1 +ARG PYTORCH_VERSION='2.3.0' +ARG ROCM_VERSION='6.0.2' +ARG PYTHON_VERSION='3.10.10' +# Automatically set by buildx +ARG TARGETPLATFORM +ENV PATH /opt/conda/bin:$PATH + +# TGI seem to require libssl.so.1.1 instead of libssl.so.3 so we can't use ubuntu 22.04. Ubuntu 20.04 has python==3.8, and TGI requires python>=3.9, hence the need for miniconda. +# Install mamba +# translating Docker's TARGETPLATFORM into mamba arches +RUN case ${TARGETPLATFORM} in \ + "linux/arm64") MAMBA_ARCH=aarch64 ;; \ + *) MAMBA_ARCH=x86_64 ;; \ + esac && \ + curl -fsSL -v -o ~/mambaforge.sh -O "https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-${MAMBA_ARCH}.sh" +RUN chmod +x ~/mambaforge.sh && \ + bash ~/mambaforge.sh -b -p /opt/conda && \ + mamba init && \ + rm ~/mambaforge.sh + +# Install flash-attention, torch dependencies +RUN pip install numpy einops ninja --no-cache-dir + +RUN conda install intel::mkl-static intel::mkl-include + +RUN pip install --pre torch==2.4.0.dev20240506 --index-url https://download.pytorch.org/whl/nightly/rocm6.1 + +RUN pip uninstall -y triton && \ + git clone --depth 1 --single-branch https://github.com/ROCm/triton.git && \ + cd triton/python && \ + pip install . + +# Set as recommended: https://github.com/ROCm/triton/wiki/A-script-to-set-program-execution-environment-in-ROCm +# Disabled for now as it is currently not stable with ROCm 6.1. +# ENV HIP_FORCE_DEV_KERNARG=1 + +FROM base AS kernel-builder + +# # Build vllm kernels +FROM kernel-builder AS vllm-builder +WORKDIR /usr/src + +COPY server/Makefile-vllm Makefile + +# Build specific version of vllm +RUN make build-vllm-rocm + +# Build Flash Attention v2 kernels +FROM kernel-builder AS flash-att-v2-builder +WORKDIR /usr/src + +COPY server/Makefile-flash-att-v2 Makefile + +# Build specific version of flash attention v2 +RUN make build-flash-attention-v2-rocm + +# Build Transformers CUDA kernels (gpt-neox and bloom) +FROM kernel-builder as custom-kernels-builder +WORKDIR /usr/src +COPY server/custom_kernels/ . +RUN python setup.py build + +# Build exllama kernels +FROM kernel-builder as exllama-kernels-builder +WORKDIR /usr/src +COPY server/exllama_kernels/ . + +RUN python setup.py build + +# Build exllama v2 kernels +FROM kernel-builder as exllamav2-kernels-builder +WORKDIR /usr/src +COPY server/exllamav2_kernels/ . + +RUN python setup.py build + +FROM base as base-copy + +# Text Generation Inference base env +ENV HUGGINGFACE_HUB_CACHE=/data \ + HF_HUB_ENABLE_HF_TRANSFER=1 \ + PORT=80 + +# Copy builds artifacts from vllm builder +COPY --from=vllm-builder /usr/src/vllm/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages + +# Copy build artifacts from flash attention v2 builder +COPY --from=flash-att-v2-builder /usr/src/flash-attention-v2/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages + +# Copy build artifacts from custom kernels builder +COPY --from=custom-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages + +# Copy build artifacts from exllama kernels builder +COPY --from=exllama-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages + +# Copy build artifacts from exllamav2 kernels builder +COPY --from=exllamav2-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages + +# Install server +COPY proto proto +COPY server server +COPY server/Makefile server/Makefile + +# Install benchmarker +COPY --from=builder /usr/src/target/release/text-generation-benchmark /usr/local/bin/text-generation-benchmark +# Install router +COPY --from=builder /usr/src/target/release/text-generation-router /usr/local/bin/text-generation-router +# Install launcher +COPY --from=builder /usr/src/target/release/text-generation-launcher /usr/local/bin/text-generation-launcher + +RUN cd server && \ + make gen-server && \ + pip install -r requirements_rocm.txt + #pip install ".[accelerate, peft, outlines]" --no-cache-dir + +# AWS Sagemaker compatible image +FROM base as sagemaker + +COPY sagemaker-entrypoint.sh entrypoint.sh +RUN chmod +x entrypoint.sh + +ENTRYPOINT ["./entrypoint.sh"] + +# Final image +FROM base-copy + +# COPY ./tgi-entrypoint.sh /tgi-entrypoint.sh +# RUN chmod +x /tgi-entrypoint.sh + +# ENTRYPOINT ["/tgi-entrypoint.sh"] +# CMD ["--json-output"]