mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-22 15:32:08 +00:00
101 lines
3.6 KiB
Docker
101 lines
3.6 KiB
Docker
# Build dependencies resolver stage
|
|
FROM lukemathwalker/cargo-chef:latest AS chef
|
|
WORKDIR /usr/src/text-generation-inference/
|
|
|
|
FROM chef AS planner
|
|
COPY Cargo.lock Cargo.lock
|
|
COPY Cargo.toml Cargo.toml
|
|
COPY rust-toolchain.toml rust-toolchain.toml
|
|
COPY backends backends
|
|
COPY benchmark benchmark
|
|
COPY clients clients
|
|
COPY launcher launcher
|
|
COPY router router
|
|
|
|
RUN cargo chef prepare --recipe-path recipe.json
|
|
|
|
FROM chef AS builder
|
|
ENV CMAKE_INSTALL_PREFIX=/usr/src/text-generation-inference/dist
|
|
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
|
|
--mount=type=cache,target=/var/lib/apt,sharing=locked \
|
|
apt update && DEBIAN_FRONTEND=noninteractive apt install -y \
|
|
clang \
|
|
cmake \
|
|
gcc g++ \
|
|
libc++-dev \
|
|
libnuma-dev \
|
|
libopenmpi-dev \
|
|
libssl-dev \
|
|
ninja-build \
|
|
openssl \
|
|
python3-dev
|
|
|
|
RUN update-alternatives --install /usr/bin/cc cc /usr/bin/clang 10 \
|
|
&& update-alternatives --install /usr/bin/c++ c++ /usr/bin/clang 10 \
|
|
&& update-alternatives --auto cc \
|
|
&& update-alternatives --auto c++ \
|
|
&& update-alternatives --display cc \
|
|
&& update-alternatives --display c++ \
|
|
&& cc --version \
|
|
&& c++ --version
|
|
|
|
COPY --from=planner /usr/src/text-generation-inference/recipe.json recipe.json
|
|
RUN cargo chef cook --profile release-opt --package text-generation-backend-llamacpp --bin text-generation-backend-llamacpp --recipe-path recipe.json
|
|
|
|
COPY Cargo.lock Cargo.lock
|
|
COPY Cargo.toml Cargo.toml
|
|
COPY rust-toolchain.toml rust-toolchain.toml
|
|
COPY backends backends
|
|
COPY benchmark benchmark
|
|
COPY launcher launcher
|
|
COPY router router
|
|
|
|
ENV RUSTFLAGS="-L/usr/lib"
|
|
ENV CMAKE_INSTALL_PREFIX=/usr/src/text-generation-inference/dist
|
|
RUN cargo build --profile release-opt --package text-generation-backend-llamacpp --bin text-generation-backend-llamacpp --frozen
|
|
|
|
FROM ubuntu:22.04 AS mimalloc-builder
|
|
ENV DEBIAN_FRONTEND=noninteractive
|
|
ENV MIMALLOC_VERSION=2.1.7
|
|
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
|
|
--mount=type=cache,target=/var/lib/apt,sharing=locked \
|
|
apt update && \
|
|
apt upgrade -y && \
|
|
apt install -y \
|
|
clang \
|
|
cmake \
|
|
ninja-build \
|
|
wget
|
|
|
|
RUN wget https://github.com/microsoft/mimalloc/archive/refs/tags/v${MIMALLOC_VERSION}.tar.gz -O mimalloc-${MIMALLOC_VERSION}.tar.gz && \
|
|
tar -xzf mimalloc-${MIMALLOC_VERSION}.tar.gz && \
|
|
cd mimalloc-${MIMALLOC_VERSION} && \
|
|
cmake -G Ninja -DCMAKE_BUILD_TYPE=Release -B build . && \
|
|
cmake --build build --parallel && \
|
|
cmake --install build
|
|
|
|
FROM ubuntu:22.04
|
|
ENV DEBIAN_FRONTEND=noninteractive
|
|
|
|
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
|
|
--mount=type=cache,target=/var/lib/apt,sharing=locked \
|
|
apt update && \
|
|
apt upgrade -y && \
|
|
apt install -y \
|
|
libopenmpi3 \
|
|
numactl \
|
|
openssl \
|
|
python3.11-dev \
|
|
python3.11-venv
|
|
|
|
COPY --from=builder /usr/src/text-generation-inference/target/release-opt/text-generation-backend-llamacpp /usr/src/text-generation-inference/text-generation-launcher
|
|
COPY --from=builder /usr/src/text-generation-inference/dist /usr/
|
|
COPY --from=builder /usr/src/text-generation-inference/backends/llamacpp/requirements.txt requirements.txt
|
|
COPY --from=mimalloc-builder /usr/local/lib/libmimalloc.so.2.1 /usr/lib/libmimalloc.so.2.1
|
|
|
|
RUN /usr/bin/python3.11 -m venv /usr/src/text-generation-inference/venv
|
|
ENV PATH="/usr/src/text-generation-inference/venv/bin:$PATH"
|
|
RUN pip3 install --no-cache-dir -r requirements.txt
|
|
ENV PORT=8080
|
|
WORKDIR /usr/src/text-generation-inference
|
|
ENTRYPOINT ["LD_PRELOAD=/usr/lib/libmimalloc.so.2.1", "text-generation-launcher"] |