add debug dockerfile

2025-09-11 12:24:53 +00:00 · 2024-05-09 22:14:24 +00:00 · 2024-05-09 22:14:24 +00:00 · cd313364a0
commit cd313364a0
parent 64e65ba3a1
2 changed files with 210 additions and 10 deletions
--- a/21
+++ b/21
@ -109,7 +109,7 @@ ARG BUILD_CAFFE2="0" \
    USE_NNPACK="0" \
    USE_QNNPACK="0" \
    USE_XNNPACK="0" \
-    USE_FLASH_ATTENTION="0" \
+    USE_FLASH_ATTENTION="1" \
    USE_MEM_EFF_ATTENTION="0"

 RUN cd pytorch && python tools/amd_build/build_amd.py && python setup.py install
@ -184,10 +184,6 @@ COPY --from=exllamav2-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-31
 COPY proto proto
 COPY server server
 COPY server/Makefile server/Makefile
-RUN cd server && \
-    make gen-server && \
-    pip install -r requirements_rocm.txt && \
-    pip install ".[accelerate, peft, outlines]" --no-cache-dir

 # Install benchmarker
 COPY --from=builder /usr/src/target/release/text-generation-benchmark /usr/local/bin/text-generation-benchmark
@ -196,6 +192,11 @@ COPY --from=builder /usr/src/target/release/text-generation-router /usr/local/bi
 # Install launcher
 COPY --from=builder /usr/src/target/release/text-generation-launcher /usr/local/bin/text-generation-launcher

+RUN cd server && \
+    make gen-server && \
+    pip install -r requirements_rocm.txt
+    #pip install ".[accelerate, peft, outlines]" --no-cache-dir
+
 # AWS Sagemaker compatible image
 FROM base as sagemaker

@ -205,10 +206,10 @@ RUN chmod +x entrypoint.sh
 ENTRYPOINT ["./entrypoint.sh"]

 # Final image
-FROM base
+FROM base-copy

-COPY ./tgi-entrypoint.sh /tgi-entrypoint.sh
-RUN chmod +x /tgi-entrypoint.sh
+# COPY ./tgi-entrypoint.sh /tgi-entrypoint.sh
+# RUN chmod +x /tgi-entrypoint.sh

-ENTRYPOINT ["/tgi-entrypoint.sh"]
-CMD ["--json-output"]
+# ENTRYPOINT ["/tgi-entrypoint.sh"]
+# CMD ["--json-output"]
--- a/199
+++ b/199
@ -0,0 +1,199 @@
+# Rust builder
+FROM lukemathwalker/cargo-chef:latest-rust-1.75 AS chef
+WORKDIR /usr/src
+
+ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
+
+FROM chef as planner
+COPY Cargo.toml Cargo.toml
+COPY rust-toolchain.toml rust-toolchain.toml
+COPY proto proto
+COPY benchmark benchmark
+COPY router router
+COPY launcher launcher
+RUN cargo chef prepare --recipe-path recipe.json
+
+FROM chef AS builder
+
+ARG GIT_SHA
+ARG DOCKER_LABEL
+
+RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \
+    curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \
+    unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \
+    unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \
+    rm -f $PROTOC_ZIP
+
+COPY --from=planner /usr/src/recipe.json recipe.json
+RUN cargo chef cook --release --recipe-path recipe.json
+
+COPY Cargo.toml Cargo.toml
+COPY rust-toolchain.toml rust-toolchain.toml
+COPY proto proto
+COPY benchmark benchmark
+COPY router router
+COPY launcher launcher
+RUN cargo build --release
+
+# Text Generation Inference base image for RoCm
+FROM rocm/dev-ubuntu-22.04:6.1 as base
+
+RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
+    build-essential \
+    ca-certificates \
+    ccache \
+    curl \
+    git \
+    make \
+    libssl-dev \
+    g++ \
+    # Needed to build VLLM & flash.
+    rocthrust-dev \
+    hipsparse-dev \
+    hipblas-dev \
+    hipblaslt-dev \
+    rocblas-dev \
+    hiprand-dev \
+    rocrand-dev \
+    miopen-hip-dev \
+    hipfft-dev \
+    hipcub-dev \
+    hipsolver-dev \
+    rccl-dev \
+    cmake \
+    python3-dev && \
+    rm -rf /var/lib/apt/lists/*
+
+# Keep in sync with `server/pyproject.toml
+ARG MAMBA_VERSION=23.1.0-1
+ARG PYTORCH_VERSION='2.3.0'
+ARG ROCM_VERSION='6.0.2'
+ARG PYTHON_VERSION='3.10.10'
+# Automatically set by buildx
+ARG TARGETPLATFORM
+ENV PATH /opt/conda/bin:$PATH
+
+# TGI seem to require libssl.so.1.1 instead of libssl.so.3 so we can't use ubuntu 22.04. Ubuntu 20.04 has python==3.8, and TGI requires python>=3.9, hence the need for miniconda.
+# Install mamba
+# translating Docker's TARGETPLATFORM into mamba arches
+RUN case ${TARGETPLATFORM} in \
+         "linux/arm64")  MAMBA_ARCH=aarch64  ;; \
+         *)              MAMBA_ARCH=x86_64   ;; \
+    esac && \
+    curl -fsSL -v -o ~/mambaforge.sh -O  "https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-${MAMBA_ARCH}.sh"
+RUN chmod +x ~/mambaforge.sh && \
+    bash ~/mambaforge.sh -b -p /opt/conda && \
+    mamba init && \
+    rm ~/mambaforge.sh
+
+# Install flash-attention, torch dependencies
+RUN pip install numpy einops ninja --no-cache-dir
+
+RUN conda install intel::mkl-static intel::mkl-include
+
+RUN pip install --pre torch==2.4.0.dev20240506 --index-url https://download.pytorch.org/whl/nightly/rocm6.1
+
+RUN pip uninstall -y triton && \
+    git clone --depth 1 --single-branch https://github.com/ROCm/triton.git && \
+    cd triton/python && \
+    pip install .
+
+# Set as recommended: https://github.com/ROCm/triton/wiki/A-script-to-set-program-execution-environment-in-ROCm
+# Disabled for now as it is currently not stable with ROCm 6.1.
+# ENV HIP_FORCE_DEV_KERNARG=1
+
+FROM base AS kernel-builder
+
+# # Build vllm kernels
+FROM kernel-builder AS vllm-builder
+WORKDIR /usr/src
+
+COPY server/Makefile-vllm Makefile
+
+# Build specific version of vllm
+RUN make build-vllm-rocm
+
+# Build Flash Attention v2 kernels
+FROM kernel-builder AS flash-att-v2-builder
+WORKDIR /usr/src
+
+COPY server/Makefile-flash-att-v2 Makefile
+
+# Build specific version of flash attention v2
+RUN make build-flash-attention-v2-rocm
+
+# Build Transformers CUDA kernels (gpt-neox and bloom)
+FROM kernel-builder as custom-kernels-builder
+WORKDIR /usr/src
+COPY server/custom_kernels/ .
+RUN python setup.py build
+
+# Build exllama kernels
+FROM kernel-builder as exllama-kernels-builder
+WORKDIR /usr/src
+COPY server/exllama_kernels/ .
+
+RUN python setup.py build
+
+# Build exllama v2 kernels
+FROM kernel-builder as exllamav2-kernels-builder
+WORKDIR /usr/src
+COPY server/exllamav2_kernels/ .
+
+RUN python setup.py build
+
+FROM base as base-copy
+
+# Text Generation Inference base env
+ENV HUGGINGFACE_HUB_CACHE=/data \
+    HF_HUB_ENABLE_HF_TRANSFER=1 \
+    PORT=80
+
+# Copy builds artifacts from vllm builder
+COPY --from=vllm-builder /usr/src/vllm/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages
+
+# Copy build artifacts from flash attention v2 builder
+COPY --from=flash-att-v2-builder /usr/src/flash-attention-v2/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages
+
+# Copy build artifacts from custom kernels builder
+COPY --from=custom-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages
+
+# Copy build artifacts from exllama kernels builder
+COPY --from=exllama-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages
+
+# Copy build artifacts from exllamav2 kernels builder
+COPY --from=exllamav2-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages
+
+# Install server
+COPY proto proto
+COPY server server
+COPY server/Makefile server/Makefile
+
+# Install benchmarker
+COPY --from=builder /usr/src/target/release/text-generation-benchmark /usr/local/bin/text-generation-benchmark
+# Install router
+COPY --from=builder /usr/src/target/release/text-generation-router /usr/local/bin/text-generation-router
+# Install launcher
+COPY --from=builder /usr/src/target/release/text-generation-launcher /usr/local/bin/text-generation-launcher
+
+RUN cd server && \
+    make gen-server && \
+    pip install -r requirements_rocm.txt
+    #pip install ".[accelerate, peft, outlines]" --no-cache-dir
+
+# AWS Sagemaker compatible image
+FROM base as sagemaker
+
+COPY sagemaker-entrypoint.sh entrypoint.sh
+RUN chmod +x entrypoint.sh
+
+ENTRYPOINT ["./entrypoint.sh"]
+
+# Final image
+FROM base-copy
+
+# COPY ./tgi-entrypoint.sh /tgi-entrypoint.sh
+# RUN chmod +x /tgi-entrypoint.sh
+
+# ENTRYPOINT ["/tgi-entrypoint.sh"]
+# CMD ["--json-output"]