2023-11-27 13:08:12 +00:00
# Rust builder
2024-02-12 09:09:29 +00:00
FROM lukemathwalker/cargo-chef:latest-rust-1.75 AS chef
2023-11-27 13:08:12 +00:00
WORKDIR /usr/src
ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
FROM chef as planner
COPY Cargo.toml Cargo.toml
COPY rust-toolchain.toml rust-toolchain.toml
COPY proto proto
COPY benchmark benchmark
COPY router router
COPY launcher launcher
RUN cargo chef prepare --recipe-path recipe.json
FROM chef AS builder
ARG GIT_SHA
ARG DOCKER_LABEL
RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \
curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \
unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \
unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \
rm -f $PROTOC_ZIP
COPY --from=planner /usr/src/recipe.json recipe.json
RUN cargo chef cook --release --recipe-path recipe.json
COPY Cargo.toml Cargo.toml
COPY rust-toolchain.toml rust-toolchain.toml
COPY proto proto
COPY benchmark benchmark
COPY router router
COPY launcher launcher
RUN cargo build --release
# Text Generation Inference base image for RoCm
2024-04-18 23:31:28 +00:00
FROM rocm/dev-ubuntu-22.04:6.0.2 as base
2023-11-27 13:08:12 +00:00
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
build-essential \
ca-certificates \
ccache \
curl \
git \
make \
libssl-dev \
g++ \
# Needed to build VLLM & flash.
rocthrust-dev \
hipsparse-dev \
2024-04-18 23:31:28 +00:00
hipblas-dev \
hipblaslt-dev \
rocblas-dev \
hiprand-dev \
rocrand-dev \
miopen-hip-dev \
hipfft-dev \
hipcub-dev \
hipsolver-dev \
rccl-dev \
cmake \
python3-dev && \
2023-11-27 13:08:12 +00:00
rm -rf /var/lib/apt/lists/*
# Keep in sync with `server/pyproject.toml
ARG MAMBA_VERSION=23.1.0-1
2024-04-18 23:31:28 +00:00
ARG PYTORCH_VERSION='2.3.0'
ARG ROCM_VERSION='6.0.2'
2023-11-27 13:08:12 +00:00
ARG PYTHON_VERSION='3.10.10'
# Automatically set by buildx
ARG TARGETPLATFORM
ENV PATH /opt/conda/bin:$PATH
# TGI seem to require libssl.so.1.1 instead of libssl.so.3 so we can't use ubuntu 22.04. Ubuntu 20.04 has python==3.8, and TGI requires python>=3.9, hence the need for miniconda.
# Install mamba
# translating Docker's TARGETPLATFORM into mamba arches
RUN case ${TARGETPLATFORM} in \
"linux/arm64") MAMBA_ARCH=aarch64 ;; \
*) MAMBA_ARCH=x86_64 ;; \
esac && \
curl -fsSL -v -o ~/mambaforge.sh -O "https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-${MAMBA_ARCH}.sh"
RUN chmod +x ~/mambaforge.sh && \
bash ~/mambaforge.sh -b -p /opt/conda && \
mamba init && \
rm ~/mambaforge.sh
2024-04-18 23:31:28 +00:00
# Install PyTorch 2.3 RC compiled against RoCm 6.0
RUN pip install torch numpy --index-url https://download.pytorch.org/whl/test/rocm6.0
2023-11-27 13:08:12 +00:00
FROM base AS kernel-builder
2024-04-19 11:11:26 +00:00
# Build Triton
FROM kernel-builder as triton-builder
WORKDIR /usr/src
COPY server/Makefile-triton Makefile
RUN make build-triton-rocm
2023-11-27 13:08:12 +00:00
# Build vllm kernels
FROM kernel-builder AS vllm-builder
WORKDIR /usr/src
COPY server/Makefile-vllm Makefile
# Build specific version of vllm
RUN make build-vllm-rocm
# Build Flash Attention v2 kernels
FROM kernel-builder AS flash-att-v2-builder
WORKDIR /usr/src
COPY server/Makefile-flash-att-v2 Makefile
# Build specific version of flash attention v2
RUN make build-flash-attention-v2-rocm
# Build Transformers CUDA kernels (gpt-neox and bloom)
FROM kernel-builder as custom-kernels-builder
WORKDIR /usr/src
COPY server/custom_kernels/ .
2024-04-18 23:31:28 +00:00
RUN PYTORCH_ROCM_ARCH="gfx90a;gfx942" python setup.py build
2023-11-27 13:08:12 +00:00
2024-01-26 15:27:44 +00:00
# Build exllama kernels
FROM kernel-builder as exllama-kernels-builder
WORKDIR /usr/src
COPY server/exllama_kernels/ .
2024-04-18 23:31:28 +00:00
RUN PYTORCH_ROCM_ARCH="gfx90a;gfx942" python setup.py build
2024-01-26 15:27:44 +00:00
# Build exllama v2 kernels
FROM kernel-builder as exllamav2-kernels-builder
WORKDIR /usr/src
COPY server/exllamav2_kernels/ .
2024-04-18 23:31:28 +00:00
RUN PYTORCH_ROCM_ARCH="gfx90a;gfx942" python setup.py build
2024-01-26 15:27:44 +00:00
2023-11-27 13:08:12 +00:00
FROM base as base-copy
# Text Generation Inference base env
ENV HUGGINGFACE_HUB_CACHE=/data \
HF_HUB_ENABLE_HF_TRANSFER=1 \
2024-04-19 11:50:01 +00:00
PORT=80 \
HIP_FORCE_DEV_KERNARG=1
2023-11-27 13:08:12 +00:00
2024-04-19 11:11:26 +00:00
# Copy builds artifacts from triton builder
COPY --from=triton-builder /usr/src/triton/python/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages
2023-11-27 13:08:12 +00:00
# Copy builds artifacts from vllm builder
COPY --from=vllm-builder /usr/src/vllm/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages
# Copy build artifacts from flash attention v2 builder
COPY --from=flash-att-v2-builder /usr/src/flash-attention-v2/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages
# Copy build artifacts from custom kernels builder
COPY --from=custom-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages
2024-01-26 15:27:44 +00:00
# Copy build artifacts from exllama kernels builder
COPY --from=exllama-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages
# Copy build artifacts from exllamav2 kernels builder
COPY --from=exllamav2-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages
2023-11-27 13:08:12 +00:00
# Install flash-attention dependencies
RUN pip install einops --no-cache-dir
# Install server
COPY proto proto
COPY server server
COPY server/Makefile server/Makefile
2024-04-18 23:31:28 +00:00
# pip install -r requirements_rocm.txt && \
#pip install ".[accelerate, peft, outlines]" --no-cache-dir
2023-11-27 13:08:12 +00:00
# Install benchmarker
COPY --from=builder /usr/src/target/release/text-generation-benchmark /usr/local/bin/text-generation-benchmark
# Install router
COPY --from=builder /usr/src/target/release/text-generation-router /usr/local/bin/text-generation-router
# Install launcher
COPY --from=builder /usr/src/target/release/text-generation-launcher /usr/local/bin/text-generation-launcher
2024-04-18 23:31:28 +00:00
RUN cd server && \
make gen-server && \
pip install -r requirements_rocm.txt
2023-11-27 13:08:12 +00:00
# AWS Sagemaker compatible image
FROM base-copy as sagemaker
COPY sagemaker-entrypoint.sh entrypoint.sh
RUN chmod +x entrypoint.sh
ENTRYPOINT ["./entrypoint.sh"]
# Final image
FROM base-copy
2024-04-18 23:31:28 +00:00
# ENTRYPOINT ["text-generation-launcher"]
# CMD ["--json-output"]