mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-19 13:52:07 +00:00
* feat(neuron): use AWS Neuron SDK 2.21.1 * feat(neuron): bump optimum-neuron version * feat(neuron): tag latest image for local tests * test(neuron): simplify sampling test
168 lines
5.0 KiB
Docker
168 lines
5.0 KiB
Docker
# Fetch and extract the TGI sources
|
|
FROM alpine AS tgi
|
|
RUN mkdir -p /tgi
|
|
|
|
# Fetch the optimum-neuron sources directly to avoid relying on pypi deployments
|
|
FROM alpine AS optimum-neuron
|
|
RUN mkdir -p /optimum-neuron
|
|
ADD https://github.com/huggingface/optimum-neuron/archive/refs/tags/v0.1.0.tar.gz /optimum-neuron/sources.tar.gz
|
|
RUN tar -C /optimum-neuron -xf /optimum-neuron/sources.tar.gz --strip-components=1
|
|
|
|
# Build cargo components (adapted from TGI original Dockerfile)
|
|
# Note: we cannot use the cargo-chef base image as it uses python 3.11
|
|
FROM ubuntu:22.04 AS chef
|
|
|
|
RUN apt-get update -y \
|
|
&& apt-get install -y --no-install-recommends \
|
|
curl ca-certificates build-essential \
|
|
&& rm -rf /var/lib/apt/lists/* \
|
|
&& apt-get clean
|
|
|
|
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- --default-toolchain 1.85.0 --profile minimal -y
|
|
ENV PATH="/root/.cargo/bin:${PATH}"
|
|
RUN cargo install cargo-chef --locked
|
|
|
|
WORKDIR /usr/src
|
|
|
|
FROM chef AS planner
|
|
COPY backends/neuron/Cargo.toml Cargo.toml
|
|
COPY Cargo.lock Cargo.lock
|
|
COPY rust-toolchain.toml rust-toolchain.toml
|
|
COPY proto proto
|
|
COPY router router
|
|
COPY backends backends
|
|
COPY launcher launcher
|
|
RUN cargo chef prepare --recipe-path recipe.json
|
|
|
|
FROM chef AS builder
|
|
|
|
RUN apt-get update -y \
|
|
&& apt-get install -y --no-install-recommends \
|
|
unzip python3-dev libssl-dev pkg-config \
|
|
&& rm -rf /var/lib/apt/lists/* \
|
|
&& apt-get clean
|
|
|
|
RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \
|
|
curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \
|
|
unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \
|
|
unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \
|
|
rm -f $PROTOC_ZIP
|
|
|
|
COPY backends/neuron/Cargo.toml Cargo.toml
|
|
COPY --from=planner /usr/src/recipe.json recipe.json
|
|
RUN cargo chef cook --release --recipe-path recipe.json
|
|
|
|
COPY Cargo.lock Cargo.lock
|
|
COPY rust-toolchain.toml rust-toolchain.toml
|
|
COPY proto proto
|
|
COPY router router
|
|
COPY backends backends
|
|
COPY launcher launcher
|
|
RUN cargo build --release
|
|
|
|
# Python base image
|
|
FROM ubuntu:22.04 AS base
|
|
|
|
RUN apt-get update -y \
|
|
&& apt-get install -y --no-install-recommends \
|
|
python3-pip \
|
|
python3-setuptools \
|
|
python-is-python3 \
|
|
&& rm -rf /var/lib/apt/lists/* \
|
|
&& apt-get clean
|
|
RUN pip3 --no-cache-dir install --upgrade pip
|
|
|
|
# Python server build image
|
|
FROM base AS pyserver
|
|
|
|
RUN apt-get update -y \
|
|
&& apt-get install -y --no-install-recommends \
|
|
make \
|
|
python3-venv \
|
|
&& rm -rf /var/lib/apt/lists/* \
|
|
&& apt-get clean
|
|
|
|
RUN install -d /pyserver
|
|
WORKDIR /pyserver
|
|
COPY backends/neuron/server server
|
|
COPY proto proto
|
|
RUN pip3 install -r server/build-requirements.txt
|
|
RUN VERBOSE=1 BUILDDIR=/pyserver/build PROTODIR=/pyserver/proto make -C server package
|
|
|
|
# Neuron base image (used for deployment)
|
|
FROM base AS neuron
|
|
|
|
# Install system prerequisites
|
|
RUN apt-get update -y \
|
|
&& apt-get install -y --no-install-recommends \
|
|
gnupg2 \
|
|
wget \
|
|
python3-dev \
|
|
libexpat1 \
|
|
&& rm -rf /var/lib/apt/lists/* \
|
|
&& apt-get clean
|
|
|
|
RUN echo "deb https://apt.repos.neuron.amazonaws.com jammy main" > /etc/apt/sources.list.d/neuron.list
|
|
RUN wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | apt-key add -
|
|
|
|
# Install neuronx packages
|
|
RUN apt-get update -y \
|
|
&& apt-get install -y --no-install-recommends \
|
|
aws-neuronx-dkms=2.19.64.0 \
|
|
aws-neuronx-collectives=2.23.135.0-3e70920f2 \
|
|
aws-neuronx-runtime-lib=2.23.112.0-9b5179492 \
|
|
aws-neuronx-tools=2.20.204.0 \
|
|
libxml2 \
|
|
&& rm -rf /var/lib/apt/lists/* \
|
|
&& apt-get clean
|
|
|
|
ENV PATH="/opt/bin/:/opt/aws/neuron/bin:${PATH}"
|
|
|
|
# Install manually torch CPU version to avoid pulling CUDA
|
|
RUN pip3 install \
|
|
torch==2.5.1 \
|
|
torchvision==0.20.1 \
|
|
--index-url https://download.pytorch.org/whl/cpu
|
|
|
|
RUN pip3 install \
|
|
neuronx-cc==2.16.372.0 \
|
|
torch-neuronx==2.5.1.2.4.0 \
|
|
transformers-neuronx==0.13.322 \
|
|
neuronx-distributed==0.10.1 \
|
|
libneuronxla==2.1.681.0 \
|
|
--extra-index-url=https://pip.repos.neuron.amazonaws.com
|
|
|
|
# Install HuggingFace packages
|
|
RUN pip3 install \
|
|
hf_transfer huggingface_hub
|
|
|
|
# Install optimum-neuron
|
|
COPY --from=optimum-neuron /optimum-neuron optimum-neuron
|
|
RUN pip3 install ./optimum-neuron
|
|
|
|
# TGI base env
|
|
ENV HUGGINGFACE_HUB_CACHE=/tmp \
|
|
HF_HUB_ENABLE_HF_TRANSFER=1 \
|
|
PORT=80
|
|
|
|
# Disable color logs as they are not supported by CloudWatch
|
|
ENV LOGURU_COLORIZE=NO
|
|
ENV LOG_COLORIZE=0
|
|
|
|
# Install router
|
|
COPY --from=builder /usr/src/target/release/text-generation-router-v2 /usr/local/bin/text-generation-router
|
|
# Install launcher
|
|
COPY --from=builder /usr/src/target/release/text-generation-launcher /usr/local/bin/text-generation-launcher
|
|
# Install python server
|
|
COPY --from=pyserver /pyserver/build/dist dist
|
|
RUN pip install dist/text_generation_server*.tar.gz
|
|
|
|
# Final image
|
|
FROM neuron
|
|
|
|
COPY backends/neuron/tgi_env.py /tgi_env.py
|
|
COPY backends/neuron/tgi-entrypoint.sh /tgi-entrypoint.sh
|
|
RUN chmod +x /tgi-entrypoint.sh
|
|
|
|
ENTRYPOINT ["/tgi-entrypoint.sh"]
|