mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-21 23:12:07 +00:00
Signed-off-by: yuanwu <yuan.wu@intel.com> Co-authored-by: Thanaji Rao Thakkalapelli <tthakkalapelli@habana.ai>
80 lines
2.5 KiB
Docker
80 lines
2.5 KiB
Docker
# Rust builder
|
|
FROM lukemathwalker/cargo-chef:latest-rust-1.78 AS chef
|
|
WORKDIR /usr/src
|
|
|
|
FROM chef as planner
|
|
COPY Cargo.toml Cargo.toml
|
|
COPY rust-toolchain.toml rust-toolchain.toml
|
|
COPY proto proto
|
|
COPY benchmark benchmark
|
|
COPY router router
|
|
COPY launcher launcher
|
|
RUN cargo chef prepare --recipe-path recipe.json
|
|
|
|
FROM chef AS builder
|
|
|
|
RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \
|
|
curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \
|
|
unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \
|
|
unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \
|
|
rm -f $PROTOC_ZIP
|
|
|
|
COPY --from=planner /usr/src/recipe.json recipe.json
|
|
COPY Cargo.lock Cargo.lock
|
|
RUN cargo chef cook --release --recipe-path recipe.json
|
|
|
|
COPY Cargo.toml Cargo.toml
|
|
COPY rust-toolchain.toml rust-toolchain.toml
|
|
COPY proto proto
|
|
COPY benchmark benchmark
|
|
COPY router router
|
|
COPY launcher launcher
|
|
RUN cargo build --release
|
|
|
|
# Text Generation Inference base image
|
|
FROM vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0:latest as base
|
|
|
|
# Text Generation Inference base env
|
|
ENV HUGGINGFACE_HUB_CACHE=/data \
|
|
HF_HUB_ENABLE_HF_TRANSFER=1 \
|
|
PORT=80
|
|
|
|
# libssl.so.1.1 is not installed on Ubuntu 22.04 by default, install it
|
|
RUN wget http://nz2.archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2_amd64.deb && \
|
|
dpkg -i ./libssl1.1_1.1.1f-1ubuntu2_amd64.deb
|
|
|
|
WORKDIR /usr/src
|
|
|
|
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
|
|
libssl-dev \
|
|
ca-certificates \
|
|
make \
|
|
curl \
|
|
git \
|
|
&& rm -rf /var/lib/apt/lists/*
|
|
|
|
# Install server
|
|
COPY proto proto
|
|
COPY server server
|
|
COPY server/Makefile server/Makefile
|
|
RUN cd server && \
|
|
make gen-server && \
|
|
pip install -r requirements.txt && \
|
|
bash ./dill-0.3.8-patch.sh && \
|
|
pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.18.0 && \
|
|
BUILD_CUDA_EXT=0 pip install git+https://github.com/AutoGPTQ/AutoGPTQ.git@097dd04e --no-build-isolation && \
|
|
pip install . --no-cache-dir
|
|
|
|
# Install benchmarker
|
|
COPY --from=builder /usr/src/target/release/text-generation-benchmark /usr/local/bin/text-generation-benchmark
|
|
# Install router
|
|
COPY --from=builder /usr/src/target/release/text-generation-router /usr/local/bin/text-generation-router
|
|
# Install launcher
|
|
COPY --from=builder /usr/src/target/release/text-generation-launcher /usr/local/bin/text-generation-launcher
|
|
|
|
# Final image
|
|
FROM base
|
|
|
|
ENTRYPOINT ["text-generation-launcher"]
|
|
CMD ["--json-output"]
|