text-generation-inference/Dockerfile_llamacpp

78 lines
2.1 KiB
Plaintext
Raw Normal View History

FROM ubuntu:24.04 AS base
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update && apt-get install -y \
python3-venv \
python3-pip
RUN python3 -m venv /venv
ENV PATH="/venv/bin:$PATH"
RUN pip3 install --no-cache-dir transformers
FROM base AS deps
WORKDIR /deps
RUN apt-get install -y \
clang cmake git
# nvidia-cuda-toolkit
# -DGGML_CUDA=ON \
ENV LLAMA_VERSION=b4585
RUN git clone --depth 1 -b ${LLAMA_VERSION} https://github.com/ggerganov/llama.cpp \
&& cd llama.cpp \
&& cmake -B build \
-DCMAKE_INSTALL_PREFIX=/usr \
-DCMAKE_INSTALL_LIBDIR=/usr/lib \
-DCMAKE_C_COMPILER=clang \
-DCMAKE_CXX_COMPILER=clang++ \
-DLLAMA_BUILD_COMMON=OFF \
-DLLAMA_BUILD_TESTS=OFF \
-DLLAMA_BUILD_EXAMPLES=OFF \
-DLLAMA_BUILD_SERVER=OFF \
&& cmake --build build --config Release -j \
&& cmake --install build
# ENV MIMALLOC_VERSION=v3.0.1
# RUN git clone --depth 1 -b ${MIMALLOC_VERSION} https://github.com/microsoft/mimalloc \
# && cd mimalloc \
# && cmake -B build \
# -DCMAKE_INSTALL_PREFIX=/usr \
# -DCMAKE_INSTALL_LIBDIR=/usr/lib \
# -DCMAKE_C_COMPILER=clang \
# -DCMAKE_CXX_COMPILER=clang++ \
# && cmake --build build --config Release -j \
# && cmake --install build
RUN apt-get install -y \
curl pkg-config libssl-dev
WORKDIR /app
COPY rust-toolchain.toml rust-toolchain.toml
RUN curl -sSf https://sh.rustup.rs | sh -s -- -y --no-modify-path --default-toolchain none
ENV PATH="/root/.cargo/bin:$PATH"
RUN cargo install cargo-chef --locked
FROM deps AS planner
COPY . .
RUN cargo chef prepare --recipe-path recipe.json
FROM deps AS builder
COPY --from=planner /app/recipe.json recipe.json
RUN cargo chef cook \
--recipe-path recipe.json \
--profile release-opt \
--package text-generation-router-llamacpp
COPY . .
RUN cargo build \
--profile release-opt \
--package text-generation-router-llamacpp --frozen
FROM base AS runtime
COPY --from=deps /usr/lib/libllama.so /usr/lib/
COPY --from=deps /usr/lib/libggml*.so /usr/lib/
COPY --from=builder /app/target/release-opt/text-generation-router-llamacpp /bin/text-generation-launcher
ENTRYPOINT ["text-generation-launcher"]