mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-20 14:22:08 +00:00
update dockerfile
This commit is contained in:
parent
54d15462dc
commit
c553a9cd77
360
Dockerfile_amd
360
Dockerfile_amd
@ -41,303 +41,245 @@ COPY backends backends
|
|||||||
COPY launcher launcher
|
COPY launcher launcher
|
||||||
RUN cargo build --profile release-opt --frozen
|
RUN cargo build --profile release-opt --frozen
|
||||||
|
|
||||||
# Text Generation Inference base image for RoCm
|
FROM rocm/dev-ubuntu-22.04:6.3.1-complete AS base
|
||||||
FROM rocm/dev-ubuntu-22.04:6.2 AS base
|
|
||||||
|
|
||||||
|
ARG HIPBLASLT_BRANCH="4d40e36"
|
||||||
|
ARG HIPBLAS_COMMON_BRANCH="7c1566b"
|
||||||
|
ARG LEGACY_HIPBLASLT_OPTION=
|
||||||
|
ARG RCCL_BRANCH="648a58d"
|
||||||
|
ARG RCCL_REPO="https://github.com/ROCm/rccl"
|
||||||
|
ARG TRITON_BRANCH="e5be006"
|
||||||
|
ARG TRITON_REPO="https://github.com/triton-lang/triton.git"
|
||||||
|
ARG PYTORCH_BRANCH="3a585126"
|
||||||
|
ARG PYTORCH_VISION_BRANCH="v0.19.1"
|
||||||
|
ARG PYTORCH_REPO="https://github.com/pytorch/pytorch.git"
|
||||||
|
ARG PYTORCH_VISION_REPO="https://github.com/pytorch/vision.git"
|
||||||
|
ARG FA_BRANCH="b7d29fb"
|
||||||
|
ARG FA_REPO="https://github.com/ROCm/flash-attention.git"
|
||||||
|
ARG AITER_BRANCH="21d47a9"
|
||||||
|
ARG AITER_REPO="https://github.com/ROCm/aiter.git"
|
||||||
|
|
||||||
|
ENV PATH=/opt/rocm/llvm/bin:$PATH
|
||||||
|
ENV ROCM_PATH=/opt/rocm
|
||||||
|
ENV LD_LIBRARY_PATH=/opt/rocm/lib:/usr/local/lib:
|
||||||
|
ARG PYTORCH_ROCM_ARCH=gfx90a;gfx942
|
||||||
|
ENV PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH}
|
||||||
|
|
||||||
|
ARG PYTHON_VERSION=3.11
|
||||||
|
|
||||||
|
RUN mkdir -p /app
|
||||||
|
WORKDIR /app
|
||||||
|
ENV DEBIAN_FRONTEND=noninteractive
|
||||||
|
|
||||||
|
# Install Python and other dependencies
|
||||||
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
|
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
|
||||||
build-essential \
|
build-essential \
|
||||||
ca-certificates \
|
ca-certificates \
|
||||||
ccache \
|
ccache \
|
||||||
curl \
|
curl \
|
||||||
git \
|
git \
|
||||||
make \
|
ninja-build \
|
||||||
libmsgpack-dev \
|
|
||||||
libssl-dev \
|
|
||||||
llvm-dev \
|
|
||||||
g++ \
|
|
||||||
# Needed to build VLLM & flash.
|
|
||||||
rocthrust-dev \
|
|
||||||
hipsparse-dev \
|
|
||||||
hipblas-dev \
|
|
||||||
hipcub-dev \
|
|
||||||
rocblas-dev \
|
|
||||||
hiprand-dev \
|
|
||||||
hipfft-dev \
|
|
||||||
rocrand-dev \
|
|
||||||
miopen-hip-dev \
|
|
||||||
hipsolver-dev \
|
|
||||||
rccl-dev \
|
|
||||||
cmake \
|
cmake \
|
||||||
|
software-properties-common \
|
||||||
|
python3.11-dev \
|
||||||
python3.11-venv && \
|
python3.11-venv && \
|
||||||
rm -rf /var/lib/apt/lists/*
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
# Keep in sync with `server/pyproject.toml
|
COPY --from=ghcr.io/astral-sh/uv:0.5.31 /uv /uvx /bin/
|
||||||
ARG MAMBA_VERSION=23.1.0-1
|
ENV PATH="$PATH:/root/.local/bin"
|
||||||
ARG PYTHON_VERSION='3.11.10'
|
RUN uv python install ${PYTHON_VERSION}
|
||||||
# Automatically set by buildx
|
RUN uv venv --python ${PYTHON_VERSION} && uv pip install pip setuptools packaging
|
||||||
ARG TARGETPLATFORM
|
ENV VIRTUAL_ENV=/usr/src/.venv/
|
||||||
ENV PATH=/opt/conda/bin:$PATH
|
ENV PATH="$PATH:/usr/src/.venv/bin/"
|
||||||
|
|
||||||
ARG PYTORCH_ROCM_ARCH="gfx90a;gfx942"
|
RUN . .venv/bin/activate && pip install -U packaging cmake ninja wheel setuptools pybind11 Cython
|
||||||
|
|
||||||
# TGI seem to require libssl.so.1.1 instead of libssl.so.3 so we can't use ubuntu 22.04. Ubuntu 20.04 has python==3.8, and TGI requires python>=3.9, hence the need for miniconda.
|
|
||||||
# Install mamba
|
|
||||||
# translating Docker's TARGETPLATFORM into mamba arches
|
|
||||||
RUN case ${TARGETPLATFORM} in \
|
|
||||||
"linux/arm64") MAMBA_ARCH=aarch64 ;; \
|
|
||||||
*) MAMBA_ARCH=x86_64 ;; \
|
|
||||||
esac && \
|
|
||||||
curl -fsSL -v -o ~/mambaforge.sh -O "https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-${MAMBA_ARCH}.sh"
|
|
||||||
RUN chmod +x ~/mambaforge.sh && \
|
|
||||||
bash ~/mambaforge.sh -b -p /opt/conda && \
|
|
||||||
mamba init && \
|
|
||||||
rm ~/mambaforge.sh
|
|
||||||
|
|
||||||
# RUN conda install intel::mkl-static intel::mkl-include
|
|
||||||
# Install pytorch
|
|
||||||
# On arm64 we exit with an error code
|
|
||||||
RUN case ${TARGETPLATFORM} in \
|
|
||||||
"linux/arm64") exit 1 ;; \
|
|
||||||
*) /opt/conda/bin/conda update -y conda && \
|
|
||||||
/opt/conda/bin/conda install -y "python=${PYTHON_VERSION}" ;; \
|
|
||||||
esac && \
|
|
||||||
/opt/conda/bin/conda clean -ya
|
|
||||||
|
|
||||||
# Install flash-attention, torch dependencies
|
|
||||||
RUN python3 -m pip install --upgrade pip uv && pip install numpy einops ninja joblib msgpack cmake --no-cache-dir && rm -rf /var/lib/apt/lists/*
|
|
||||||
|
|
||||||
RUN conda install mkl=2021
|
|
||||||
ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/rocm/lib/:/opt/conda/lib/python3.11/site-packages/torch/lib:/opt/conda/lib/
|
|
||||||
|
|
||||||
|
|
||||||
ARG COMMON_WORKDIR=/
|
|
||||||
WORKDIR ${COMMON_WORKDIR}
|
|
||||||
|
|
||||||
|
|
||||||
# Install HIPBLASLt
|
|
||||||
FROM base AS build_hipblaslt
|
FROM base AS build_hipblaslt
|
||||||
ARG HIPBLASLT_BRANCH="e6da924"
|
ARG HIPBLASLT_BRANCH
|
||||||
RUN git clone https://github.com/ROCm/hipBLASLt.git \
|
ARG HIPBLAS_COMMON_BRANCH
|
||||||
&& cd hipBLASLt \
|
# Set to "--legacy_hipblas_direct" for ROCm<=6.2
|
||||||
|
ARG LEGACY_HIPBLASLT_OPTION
|
||||||
|
RUN git clone https://github.com/ROCm/hipBLAS-common.git
|
||||||
|
RUN . .venv/bin/activate && cd hipBLAS-common \
|
||||||
|
&& git checkout ${HIPBLAS_COMMON_BRANCH} \
|
||||||
|
&& mkdir build \
|
||||||
|
&& cd build \
|
||||||
|
&& cmake .. \
|
||||||
|
&& make package \
|
||||||
|
&& dpkg -i ./*.deb
|
||||||
|
RUN git clone https://github.com/ROCm/hipBLASLt
|
||||||
|
RUN . .venv/bin/activate && cd hipBLASLt \
|
||||||
&& git checkout ${HIPBLASLT_BRANCH} \
|
&& git checkout ${HIPBLASLT_BRANCH} \
|
||||||
&& SCCACHE_IDLE_TIMEOUT=1800 ./install.sh --architecture ${PYTORCH_ROCM_ARCH} --legacy_hipblas_direct \
|
&& ./install.sh -d --architecture ${PYTORCH_ROCM_ARCH} ${LEGACY_HIPBLASLT_OPTION} \
|
||||||
&& cd build/release \
|
&& cd build/release \
|
||||||
&& make package
|
&& make package
|
||||||
|
RUN mkdir -p /app/install && cp /app/hipBLASLt/build/release/*.deb /app/hipBLAS-common/build/*.deb /app/install
|
||||||
|
|
||||||
FROM scratch AS export_hipblaslt
|
|
||||||
ARG COMMON_WORKDIR
|
|
||||||
COPY --from=build_hipblaslt ${COMMON_WORKDIR}/hipBLASLt/build/release/*.deb /
|
|
||||||
|
|
||||||
# RCCL build stages
|
|
||||||
FROM base AS build_rccl
|
FROM base AS build_rccl
|
||||||
ARG RCCL_BRANCH="rocm-6.2.0"
|
ARG RCCL_BRANCH
|
||||||
RUN git clone https://github.com/ROCm/rccl \
|
ARG RCCL_REPO
|
||||||
&& cd rccl \
|
RUN git clone ${RCCL_REPO}
|
||||||
|
RUN . .venv/bin/activate && cd rccl \
|
||||||
&& git checkout ${RCCL_BRANCH} \
|
&& git checkout ${RCCL_BRANCH} \
|
||||||
&& ./install.sh -p --amdgpu_targets ${PYTORCH_ROCM_ARCH}
|
&& ./install.sh -p --amdgpu_targets ${PYTORCH_ROCM_ARCH}
|
||||||
FROM scratch AS export_rccl
|
RUN mkdir -p /app/install && cp /app/rccl/build/release/*.deb /app/install
|
||||||
ARG COMMON_WORKDIR
|
|
||||||
COPY --from=build_rccl ${COMMON_WORKDIR}/rccl/build/release/*.deb /
|
|
||||||
|
|
||||||
# Triton build stages
|
|
||||||
FROM base AS build_triton
|
FROM base AS build_triton
|
||||||
ARG TRITON_BRANCH="e192dba"
|
ARG TRITON_BRANCH
|
||||||
ARG TRITON_REPO="https://github.com/triton-lang/triton.git"
|
ARG TRITON_REPO
|
||||||
RUN python3 -m pip install ninja cmake wheel pybind11 && git clone ${TRITON_REPO} \
|
RUN git clone ${TRITON_REPO}
|
||||||
&& cd triton \
|
RUN . .venv/bin/activate && cd triton \
|
||||||
&& git checkout ${TRITON_BRANCH} \
|
&& git checkout ${TRITON_BRANCH} \
|
||||||
&& cd python \
|
&& cd python \
|
||||||
&& python3 setup.py bdist_wheel --dist-dir=dist
|
&& python3 setup.py bdist_wheel --dist-dir=dist
|
||||||
FROM scratch AS export_triton
|
RUN mkdir -p /app/install && cp /app/triton/python/dist/*.whl /app/install
|
||||||
ARG COMMON_WORKDIR
|
|
||||||
COPY --from=build_triton ${COMMON_WORKDIR}/triton/python/dist/*.whl /
|
|
||||||
|
|
||||||
# # AMD-SMI build stages
|
|
||||||
FROM base AS build_amdsmi
|
FROM base AS build_amdsmi
|
||||||
RUN cd /opt/rocm/share/amd_smi \
|
RUN . .venv/bin/activate && cd /opt/rocm/share/amd_smi \
|
||||||
&& pip wheel . --wheel-dir=dist
|
&& pip wheel . --wheel-dir=dist
|
||||||
FROM scratch AS export_amdsmi
|
RUN mkdir -p /app/install && cp /opt/rocm/share/amd_smi/dist/*.whl /app/install
|
||||||
COPY --from=build_amdsmi /opt/rocm/share/amd_smi/dist/*.whl /
|
|
||||||
|
|
||||||
|
FROM base AS build_pytorch
|
||||||
|
ARG PYTORCH_BRANCH
|
||||||
|
ARG PYTORCH_VISION_BRANCH
|
||||||
|
ARG PYTORCH_REPO
|
||||||
|
ARG PYTORCH_VISION_REPO
|
||||||
|
ARG FA_BRANCH
|
||||||
|
ARG FA_REPO
|
||||||
|
RUN git clone ${PYTORCH_REPO} pytorch
|
||||||
|
RUN . .venv/bin/activate && cd pytorch && git checkout ${PYTORCH_BRANCH} && \
|
||||||
|
pip install -r requirements.txt && git submodule update --init --recursive \
|
||||||
|
&& python3 tools/amd_build/build_amd.py \
|
||||||
|
&& CMAKE_PREFIX_PATH=$(python3 -c 'import sys; print(sys.prefix)') python3 setup.py bdist_wheel --dist-dir=dist \
|
||||||
|
&& pip install dist/*.whl
|
||||||
|
RUN git clone ${PYTORCH_VISION_REPO} vision
|
||||||
|
RUN . .venv/bin/activate && cd vision && git checkout ${PYTORCH_VISION_BRANCH} \
|
||||||
|
&& python3 setup.py bdist_wheel --dist-dir=dist \
|
||||||
|
&& pip install dist/*.whl
|
||||||
|
RUN git clone ${FA_REPO}
|
||||||
|
RUN . .venv/bin/activate && cd flash-attention \
|
||||||
|
&& git checkout ${FA_BRANCH} \
|
||||||
|
&& git submodule update --init \
|
||||||
|
&& MAX_JOBS=64 GPU_ARCHS=${PYTORCH_ROCM_ARCH} python3 setup.py bdist_wheel --dist-dir=dist
|
||||||
|
RUN mkdir -p /app/install && cp /app/pytorch/dist/*.whl /app/install \
|
||||||
|
&& cp /app/vision/dist/*.whl /app/install \
|
||||||
|
&& cp /app/flash-attention/dist/*.whl /app/install
|
||||||
|
|
||||||
FROM base as build_pytorch
|
FROM base AS final
|
||||||
|
RUN --mount=type=bind,from=build_hipblaslt,src=/app/install/,target=/install \
|
||||||
RUN --mount=type=bind,from=export_hipblaslt,src=/,target=/install \
|
dpkg -i /install/*deb \
|
||||||
if ls /install/*.deb; then \
|
|
||||||
dpkg -i /install/*.deb \
|
|
||||||
&& sed -i 's/, hipblaslt-dev \(.*\), hipcub-dev/, hipcub-dev/g' /var/lib/dpkg/status \
|
&& sed -i 's/, hipblaslt-dev \(.*\), hipcub-dev/, hipcub-dev/g' /var/lib/dpkg/status \
|
||||||
&& sed -i 's/, hipblaslt \(.*\), hipfft/, hipfft/g' /var/lib/dpkg/status; \
|
&& sed -i 's/, hipblaslt \(.*\), hipfft/, hipfft/g' /var/lib/dpkg/status
|
||||||
fi
|
RUN --mount=type=bind,from=build_rccl,src=/app/install/,target=/install \
|
||||||
|
dpkg -i /install/*deb \
|
||||||
ARG BUILD_ENVIRONMENT=pytorch-linux-jammy-rocm6.2-py3.11
|
|
||||||
ARG PYTORCH_ROCM_ARCH="gfx90a;gfx942"
|
|
||||||
|
|
||||||
# A commit to fix the output scaling factor issue in _scaled_mm
|
|
||||||
# Not yet in 2.5.0-rc1
|
|
||||||
ARG PYTORCH_BRANCH="cedc116"
|
|
||||||
ARG PYTORCH_VISION_BRANCH="v0.19.1"
|
|
||||||
ARG PYTORCH_REPO="https://github.com/ROCm/pytorch.git"
|
|
||||||
|
|
||||||
RUN git clone ${PYTORCH_REPO} pytorch \
|
|
||||||
&& cd pytorch && git checkout ${PYTORCH_BRANCH} && git submodule update --init --recursive \
|
|
||||||
&& pip install -r requirements.txt --no-cache-dir \
|
|
||||||
&& python tools/amd_build/build_amd.py \
|
|
||||||
&& CMAKE_PREFIX_PATH=$(python3 -c 'import sys; print(sys.prefix)') python3 setup.py bdist_wheel --dist-dir=dist
|
|
||||||
FROM scratch as export_pytorch
|
|
||||||
ARG COMMON_WORKDIR
|
|
||||||
COPY --from=build_pytorch ${COMMON_WORKDIR}/pytorch/dist/*.whl /
|
|
||||||
|
|
||||||
FROM base AS install_deps
|
|
||||||
|
|
||||||
ARG COMMON_WORKDIR
|
|
||||||
|
|
||||||
# Install hipblaslt
|
|
||||||
RUN --mount=type=bind,from=export_hipblaslt,src=/,target=/install \
|
|
||||||
if ls /install/*.deb; then \
|
|
||||||
dpkg -i /install/*.deb \
|
|
||||||
&& sed -i 's/, hipblaslt-dev \(.*\), hipcub-dev/, hipcub-dev/g' /var/lib/dpkg/status \
|
|
||||||
&& sed -i 's/, hipblaslt \(.*\), hipfft/, hipfft/g' /var/lib/dpkg/status; \
|
|
||||||
fi
|
|
||||||
|
|
||||||
RUN --mount=type=bind,from=export_rccl,src=/,target=/install \
|
|
||||||
if ls /install/*.deb; then \
|
|
||||||
dpkg -i /install/*.deb \
|
|
||||||
# RCCL needs to be installed twice
|
|
||||||
&& dpkg -i /install/*.deb \
|
|
||||||
&& sed -i 's/, rccl-dev \(.*\), rocalution/, rocalution/g' /var/lib/dpkg/status \
|
&& sed -i 's/, rccl-dev \(.*\), rocalution/, rocalution/g' /var/lib/dpkg/status \
|
||||||
&& sed -i 's/, rccl \(.*\), rocalution/, rocalution/g' /var/lib/dpkg/status; \
|
&& sed -i 's/, rccl \(.*\), rocalution/, rocalution/g' /var/lib/dpkg/status
|
||||||
fi
|
RUN --mount=type=bind,from=build_triton,src=/app/install/,target=/install \
|
||||||
|
. .venv/bin/activate && \
|
||||||
|
pip install /install/*.whl
|
||||||
|
RUN --mount=type=bind,from=build_amdsmi,src=/app/install/,target=/install \
|
||||||
|
. .venv/bin/activate && \
|
||||||
|
pip install /install/*.whl
|
||||||
|
RUN --mount=type=bind,from=build_pytorch,src=/app/install/,target=/install \
|
||||||
|
. .venv/bin/activate && \
|
||||||
|
pip install /install/*.whl
|
||||||
|
|
||||||
RUN --mount=type=bind,from=export_triton,src=/,target=/install \
|
ARG AITER_REPO
|
||||||
if ls /install/*.whl; then \
|
ARG AITER_BRANCH
|
||||||
# Preemptively uninstall to prevent pip same-version no-installs
|
RUN git clone --recursive ${AITER_REPO}
|
||||||
pip uninstall -y triton \
|
RUN . .venv/bin/activate && cd aiter \
|
||||||
&& pip install /install/*.whl; \
|
&& git checkout ${AITER_BRANCH} \
|
||||||
fi
|
&& git submodule update --init --recursive \
|
||||||
|
&& pip install -r requirements.txt \
|
||||||
|
&& PREBUILD_KERNELS=1 GPU_ARCHS=gfx942 python3 setup.py develop && pip show aiter
|
||||||
|
|
||||||
RUN --mount=type=bind,from=export_amdsmi,src=/,target=/install \
|
RUN rm -rf /var/lib/apt/lists/*
|
||||||
# Preemptively uninstall to prevent pip same-version no-installs
|
|
||||||
pip uninstall -y amdsmi \
|
|
||||||
&& pip install /install/*.whl;
|
|
||||||
|
|
||||||
RUN --mount=type=bind,from=export_pytorch,src=/,target=/install \
|
|
||||||
if ls /install/*.whl; then \
|
|
||||||
# Preemptively uninstall to prevent pip same-version no-installs
|
|
||||||
pip uninstall -y torch torchvision \
|
|
||||||
&& pip install /install/*.whl; \
|
|
||||||
fi
|
|
||||||
|
|
||||||
FROM install_deps AS kernel-builder
|
|
||||||
|
|
||||||
|
FROM final AS kernel-builder
|
||||||
# # Build vllm kernels
|
# # Build vllm kernels
|
||||||
FROM kernel-builder AS vllm-builder
|
FROM kernel-builder AS vllm-builder
|
||||||
WORKDIR /usr/src
|
|
||||||
|
|
||||||
COPY server/Makefile-vllm Makefile
|
COPY server/Makefile-vllm Makefile
|
||||||
RUN pip install setuptools_scm
|
RUN . .venv/bin/activate && pip install setuptools_scm
|
||||||
|
|
||||||
# Build specific version of vllm
|
# Build specific version of vllm
|
||||||
RUN make build-vllm-rocm
|
RUN . .venv/bin/activate && make build-vllm-rocm
|
||||||
|
|
||||||
# Build Flash Attention v2 kernels
|
|
||||||
FROM kernel-builder AS flash-att-v2-builder
|
|
||||||
WORKDIR /usr/src
|
|
||||||
|
|
||||||
COPY server/Makefile-flash-att-v2 Makefile
|
|
||||||
|
|
||||||
# Build specific version of flash attention v2
|
|
||||||
RUN make build-flash-attention-v2-rocm
|
|
||||||
|
|
||||||
# Build Transformers CUDA kernels (gpt-neox and bloom)
|
# Build Transformers CUDA kernels (gpt-neox and bloom)
|
||||||
FROM kernel-builder AS custom-kernels-builder
|
FROM kernel-builder AS custom-kernels-builder
|
||||||
WORKDIR /usr/src
|
|
||||||
COPY server/custom_kernels/ .
|
COPY server/custom_kernels/ .
|
||||||
RUN python setup.py build
|
RUN . .venv/bin/activate && python3 setup.py bdist_wheel --dist-dir=dist
|
||||||
|
|
||||||
# Build exllama kernels
|
# Build exllama kernels
|
||||||
FROM kernel-builder AS exllama-kernels-builder
|
FROM kernel-builder AS exllama-kernels-builder
|
||||||
WORKDIR /usr/src
|
|
||||||
COPY server/exllama_kernels/ .
|
COPY server/exllama_kernels/ .
|
||||||
|
RUN . .venv/bin/activate && python3 setup.py bdist_wheel --dist-dir=dist
|
||||||
RUN python setup.py build
|
|
||||||
|
|
||||||
# Build exllama v2 kernels
|
# Build exllama v2 kernels
|
||||||
FROM kernel-builder AS exllamav2-kernels-builder
|
FROM kernel-builder AS exllamav2-kernels-builder
|
||||||
WORKDIR /usr/src
|
|
||||||
COPY server/exllamav2_kernels/ .
|
COPY server/exllamav2_kernels/ .
|
||||||
|
RUN . .venv/bin/activate && python3 setup.py bdist_wheel --dist-dir=dist
|
||||||
RUN python setup.py build
|
|
||||||
|
|
||||||
FROM kernel-builder AS marlin-kernels
|
FROM kernel-builder AS marlin-kernels
|
||||||
WORKDIR /usr/src
|
|
||||||
ENV MARLIN_KERNELS_BRANCH=v0.3.6
|
ENV MARLIN_KERNELS_BRANCH=v0.3.6
|
||||||
ENV VLLM_TARGET_DEVICE=rocm
|
ENV VLLM_TARGET_DEVICE=rocm
|
||||||
RUN git clone https://github.com/danieldk/marlin-kernels.git && \
|
RUN . .venv/bin/activate && git clone https://github.com/danieldk/marlin-kernels.git && \
|
||||||
cd marlin-kernels && \
|
cd marlin-kernels && \
|
||||||
git checkout ${MARLIN_KERNELS_BRANCH} && \
|
git checkout ${MARLIN_KERNELS_BRANCH} && \
|
||||||
python setup.py install
|
python3 setup.py bdist_wheel --dist-dir=dist
|
||||||
|
|
||||||
FROM kernel-builder AS moe-kernels
|
FROM kernel-builder AS moe-kernels
|
||||||
WORKDIR /usr/src
|
|
||||||
ENV MOE_KERNELS_BRANCH=v0.8.2
|
ENV MOE_KERNELS_BRANCH=v0.8.2
|
||||||
ENV VLLM_TARGET_DEVICE=rocm
|
ENV VLLM_TARGET_DEVICE=rocm
|
||||||
RUN git clone https://github.com/danieldk/moe-kernels.git && \
|
RUN . .venv/bin/activate && git clone https://github.com/danieldk/moe-kernels.git && \
|
||||||
cd moe-kernels && \
|
cd moe-kernels && \
|
||||||
git checkout ${MOE_KERNELS_BRANCH} && \
|
git checkout ${MOE_KERNELS_BRANCH} && \
|
||||||
python setup.py install
|
python3 setup.py bdist_wheel --dist-dir=dist
|
||||||
|
|
||||||
FROM install_deps AS base-copy
|
FROM final AS base-copy
|
||||||
|
|
||||||
# Text Generation Inference base env
|
# Text Generation Inference base env
|
||||||
ENV HF_HOME=/data \
|
ENV HF_HOME=/data \
|
||||||
HF_HUB_ENABLE_HF_TRANSFER=1 \
|
HF_HUB_ENABLE_HF_TRANSFER=1 \
|
||||||
PORT=80
|
PORT=80
|
||||||
|
|
||||||
# Copy builds artifacts from vllm builder
|
ENV VIRTUAL_ENV=/app/.venv/
|
||||||
COPY --from=vllm-builder /usr/src/vllm/build/lib.linux-x86_64-cpython-311 /opt/conda/lib/python3.11/site-packages
|
ENV PATH="$PATH:/app/.venv/bin/"
|
||||||
|
|
||||||
# Copy build artifacts from flash attention v2 builder
|
|
||||||
COPY --from=flash-att-v2-builder /usr/src/flash-attention-v2/build/lib.linux-x86_64-cpython-311 /opt/conda/lib/python3.11/site-packages
|
|
||||||
|
|
||||||
# Copy build artifacts from custom kernels builder
|
|
||||||
COPY --from=custom-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-311 /opt/conda/lib/python3.11/site-packages
|
|
||||||
|
|
||||||
# Copy build artifacts from exllama kernels builder
|
|
||||||
COPY --from=exllama-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-311 /opt/conda/lib/python3.11/site-packages
|
|
||||||
|
|
||||||
# Copy build artifacts from exllamav2 kernels builder
|
|
||||||
COPY --from=exllamav2-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-311 /opt/conda/lib/python3.11/site-packages
|
|
||||||
|
|
||||||
# Copy build artifacts from marlin kernels
|
|
||||||
COPY --from=marlin-kernels /usr/src/marlin-kernels/build/lib.linux-x86_64-cpython-311 /opt/conda/lib/python3.11/site-packages
|
|
||||||
|
|
||||||
# Copy build artifacts from moe kernels
|
|
||||||
COPY --from=moe-kernels /usr/src/moe-kernels/build/lib.linux-x86_64-cpython-311 /opt/conda/lib/python3.11/site-packages
|
|
||||||
|
|
||||||
# Install server
|
# Install server
|
||||||
COPY proto proto
|
COPY proto proto
|
||||||
COPY server server
|
COPY server server
|
||||||
COPY server/Makefile server/Makefile
|
COPY server/Makefile server/Makefile
|
||||||
ENV UV_SYSTEM_PYTHON=1
|
|
||||||
RUN cd server && \
|
RUN cd server && \
|
||||||
pip install -U pip uv && \
|
uv pip install grpcio-tools mypy-protobuf && \
|
||||||
uv sync --frozen --extra gen --extra accelerate --extra compressed-tensors --extra quantize --extra peft --extra outlines --no-install-project && \
|
uv pip install ".[accelerate, compressed-tensors, peft, outlines]" --no-cache-dir && \
|
||||||
. ./.venv/bin/activate && \
|
|
||||||
make gen-server-raw
|
make gen-server-raw
|
||||||
|
RUN cp -r server/text_generation_server/pb /app/.venv/lib/python3.11/site-packages/text_generation_server/pb
|
||||||
RUN cd server && \
|
RUN cd server && \
|
||||||
uv sync --frozen --extra gen --extra accelerate --extra compressed-tensors --extra quantize --extra peft --extra outlines && \
|
|
||||||
. ./.venv/bin/activate && \
|
|
||||||
pwd && \
|
pwd && \
|
||||||
text-generation-server --help
|
text-generation-server --help
|
||||||
|
|
||||||
|
RUN --mount=type=bind,from=vllm-builder,src=/app/vllm/dist,target=/install \
|
||||||
|
uv pip install /install/*.whl
|
||||||
|
RUN --mount=type=bind,from=custom-kernels-builder,src=/app/dist,target=/install \
|
||||||
|
uv pip install /install/*.whl
|
||||||
|
RUN --mount=type=bind,from=custom-kernels-builder,src=/app/dist,target=/install \
|
||||||
|
uv pip install /install/*.whl
|
||||||
|
RUN --mount=type=bind,from=exllama-kernels-builder,src=/app/dist,target=/install \
|
||||||
|
uv pip install /install/*.whl
|
||||||
|
RUN --mount=type=bind,from=exllamav2-kernels-builder,src=/app/dist,target=/install \
|
||||||
|
uv pip install /install/*.whl
|
||||||
|
RUN --mount=type=bind,from=marlin-kernels,src=/app/marlin-kernels/dist,target=/install \
|
||||||
|
uv pip install /install/*.whl
|
||||||
|
RUN --mount=type=bind,from=moe-kernels,src=/app/moe-kernels/dist,target=/install \
|
||||||
|
uv pip install /install/*.whl
|
||||||
|
|
||||||
# Install benchmarker
|
# Install benchmarker
|
||||||
COPY --from=builder /usr/src/target/release-opt/text-generation-benchmark /usr/local/bin/text-generation-benchmark
|
COPY --from=builder /usr/src/target/release-opt/text-generation-benchmark /usr/local/bin/text-generation-benchmark
|
||||||
# Install router
|
# Install router
|
||||||
COPY --from=builder /usr/src/target/release-opt/text-generation-router /usr/local/bin/text-generation-router
|
COPY --from=builder /usr/src/target/release-opt/text-generation-router /usr/local/bin/text-generation-router
|
||||||
# Install launcher
|
# Install launcher
|
||||||
COPY --from=builder /usr/src/target/release-opt/text-generation-launcher /usr/local/bin/text-generation-launcher
|
COPY --from=builder /usr/src/target/release-opt/text-generation-launcher /usr/local/bin/text-generation-launcher
|
||||||
ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/opt/conda/lib/"
|
|
||||||
|
|
||||||
# AWS Sagemaker compatible image
|
# AWS Sagemaker compatible image
|
||||||
FROM base AS sagemaker
|
FROM base AS sagemaker
|
||||||
@ -368,4 +310,6 @@ COPY ./tgi-entrypoint.sh /tgi-entrypoint.sh
|
|||||||
RUN chmod +x /tgi-entrypoint.sh
|
RUN chmod +x /tgi-entrypoint.sh
|
||||||
|
|
||||||
ENTRYPOINT ["/tgi-entrypoint.sh"]
|
ENTRYPOINT ["/tgi-entrypoint.sh"]
|
||||||
CMD ["--json-output"]
|
ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/root/.local/share/uv/python/cpython-3.11.11-linux-x86_64-gnu/lib"
|
||||||
|
ENV PYTHONPATH=/app/.venv/lib/python3.11/site-packages
|
||||||
|
# CMD ["--json-output"]
|
||||||
|
Loading…
Reference in New Issue
Block a user