text-generation-inference/Dockerfile.llamacpp

101 lines
3.6 KiB
Docker

# Build dependencies resolver stage
FROM lukemathwalker/cargo-chef:latest AS chef
WORKDIR /usr/src/text-generation-inference/
FROM chef AS planner
COPY Cargo.lock Cargo.lock
COPY Cargo.toml Cargo.toml
COPY rust-toolchain.toml rust-toolchain.toml
COPY backends backends
COPY benchmark benchmark
COPY clients clients
COPY launcher launcher
COPY router router
RUN cargo chef prepare --recipe-path recipe.json
FROM chef AS builder
ENV CMAKE_INSTALL_PREFIX=/usr/src/text-generation-inference/dist
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
--mount=type=cache,target=/var/lib/apt,sharing=locked \
apt update && DEBIAN_FRONTEND=noninteractive apt install -y \
clang \
cmake \
gcc g++ \
libc++-dev \
libnuma-dev \
libopenmpi-dev \
libssl-dev \
ninja-build \
openssl \
python3-dev
RUN update-alternatives --install /usr/bin/cc cc /usr/bin/clang 10 \
&& update-alternatives --install /usr/bin/c++ c++ /usr/bin/clang 10 \
&& update-alternatives --auto cc \
&& update-alternatives --auto c++ \
&& update-alternatives --display cc \
&& update-alternatives --display c++ \
&& cc --version \
&& c++ --version
COPY --from=planner /usr/src/text-generation-inference/recipe.json recipe.json
RUN cargo chef cook --profile release-opt --package text-generation-backend-llamacpp --bin text-generation-backend-llamacpp --recipe-path recipe.json
COPY Cargo.lock Cargo.lock
COPY Cargo.toml Cargo.toml
COPY rust-toolchain.toml rust-toolchain.toml
COPY backends backends
COPY benchmark benchmark
COPY launcher launcher
COPY router router
ENV RUSTFLAGS="-L/usr/lib"
ENV CMAKE_INSTALL_PREFIX=/usr/src/text-generation-inference/dist
RUN cargo build --profile release-opt --package text-generation-backend-llamacpp --bin text-generation-backend-llamacpp --frozen
FROM ubuntu:22.04 AS mimalloc-builder
ENV DEBIAN_FRONTEND=noninteractive
ENV MIMALLOC_VERSION=2.1.7
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
--mount=type=cache,target=/var/lib/apt,sharing=locked \
apt update && \
apt upgrade -y && \
apt install -y \
clang \
cmake \
ninja-build \
wget
RUN wget https://github.com/microsoft/mimalloc/archive/refs/tags/v${MIMALLOC_VERSION}.tar.gz -O mimalloc-${MIMALLOC_VERSION}.tar.gz && \
tar -xzf mimalloc-${MIMALLOC_VERSION}.tar.gz && \
cd mimalloc-${MIMALLOC_VERSION} && \
cmake -G Ninja -DCMAKE_BUILD_TYPE=Release -B build . && \
cmake --build build --parallel && \
cmake --install build
FROM ubuntu:22.04
ENV DEBIAN_FRONTEND=noninteractive
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
--mount=type=cache,target=/var/lib/apt,sharing=locked \
apt update && \
apt upgrade -y && \
apt install -y \
libopenmpi3 \
numactl \
openssl \
python3.11-dev \
python3.11-venv
COPY --from=builder /usr/src/text-generation-inference/target/release-opt/text-generation-backend-llamacpp /usr/src/text-generation-inference/text-generation-launcher
COPY --from=builder /usr/src/text-generation-inference/dist /usr/
COPY --from=builder /usr/src/text-generation-inference/backends/llamacpp/requirements.txt requirements.txt
COPY --from=mimalloc-builder /usr/local/lib/libmimalloc.so.2.1 /usr/lib/libmimalloc.so.2.1
RUN /usr/bin/python3.11 -m venv /usr/src/text-generation-inference/venv
ENV PATH="/usr/src/text-generation-inference/venv/bin:$PATH"
RUN pip3 install --no-cache-dir -r requirements.txt
ENV PORT=8080
WORKDIR /usr/src/text-generation-inference
ENTRYPOINT ["LD_PRELOAD=/usr/lib/libmimalloc.so.2.1", "text-generation-launcher"]