mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 12:24:53 +00:00
misc(ci): let's try to build the Dockerfile for trtllm
This commit is contained in:
parent
2f61ec46d1
commit
0ff0c6a482
24
.github/workflows/build_trtllm.yaml
vendored
24
.github/workflows/build_trtllm.yaml
vendored
@ -43,19 +43,29 @@ jobs:
|
|||||||
aws-region: us-east-1
|
aws-region: us-east-1
|
||||||
role-to-assume: ${{ secrets.AWS_ROLE_GITHUB_TGI_TEST }}
|
role-to-assume: ${{ secrets.AWS_ROLE_GITHUB_TGI_TEST }}
|
||||||
|
|
||||||
- name: "Install OS dependencies"
|
- name: Initialize Docker Buildx
|
||||||
run: apt update && apt install -y cmake gcc g++ ninja-build openmpi-devel
|
uses: docker/setup-buildx-action@v3
|
||||||
- name: "Install sccache"
|
with:
|
||||||
run: cargo install sccache --locked
|
install: true
|
||||||
|
buildkitd-config: /tmp/buildkitd.toml
|
||||||
|
|
||||||
- name: "Build TensorRT-LLM Backend"
|
- name: Build and push Docker image
|
||||||
env:
|
id: build-and-push
|
||||||
|
uses: docker/build-push-action@v4
|
||||||
|
with:
|
||||||
|
context: .
|
||||||
|
file: Dockerfile_trtllm
|
||||||
|
target: runtime
|
||||||
|
push: false
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
build-args: |
|
||||||
|
BUILD_TYPE=debug
|
||||||
SCCACHE_BUCKET: ${{ secrets.AWS_S3_BUCKET_GITHUB_TGI_TEST }}
|
SCCACHE_BUCKET: ${{ secrets.AWS_S3_BUCKET_GITHUB_TGI_TEST }}
|
||||||
SCCACHE_REGION: "us-east-1"
|
SCCACHE_REGION: "us-east-1"
|
||||||
SCCACHE_S3_USE_SSL: false
|
SCCACHE_S3_USE_SSL: false
|
||||||
SCCACHE_S3_KEY_PREFIX: "tgi+trtllm"
|
SCCACHE_S3_KEY_PREFIX: "tgi+trtllm"
|
||||||
RUSTC_WRAPPER: sccache
|
RUSTC_WRAPPER: sccache
|
||||||
run: cargo build --package text-generation-backends-trtllm --bin text-generation-backends-trtllm
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,13 +1,6 @@
|
|||||||
ARG CUDA_ARCH_LIST="75-real;80-real;86-real;89-real;90-real"
|
ARG CUDA_ARCH_LIST="75-real;80-real;86-real;89-real;90-real"
|
||||||
ARG OMPI_VERSION="4.1.7rc1"
|
ARG OMPI_VERSION="4.1.7rc1"
|
||||||
|
ARG BUILD_TYPE=release
|
||||||
# Build dependencies resolver stage
|
|
||||||
FROM lukemathwalker/cargo-chef:latest AS chef
|
|
||||||
WORKDIR /usr/src/text-generation-inference/backends/trtllm
|
|
||||||
|
|
||||||
FROM chef AS planner
|
|
||||||
COPY . .
|
|
||||||
RUN cargo chef prepare --recipe-path recipe.json
|
|
||||||
|
|
||||||
# CUDA dependent dependencies resolver stage
|
# CUDA dependent dependencies resolver stage
|
||||||
FROM nvidia/cuda:12.6.3-cudnn-devel-ubuntu24.04 AS cuda-builder
|
FROM nvidia/cuda:12.6.3-cudnn-devel-ubuntu24.04 AS cuda-builder
|
||||||
@ -62,18 +55,14 @@ FROM cuda-builder AS tgi-builder
|
|||||||
WORKDIR /usr/src/text-generation-inference
|
WORKDIR /usr/src/text-generation-inference
|
||||||
|
|
||||||
# Install Rust
|
# Install Rust
|
||||||
|
ENV PATH="/root/.cargo/bin:$PATH"
|
||||||
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | bash -s -- -y && \
|
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | bash -s -- -y && \
|
||||||
chmod -R a+w /root/.rustup && \
|
chmod -R a+w /root/.rustup && \
|
||||||
chmod -R a+w /root/.cargo
|
chmod -R a+w /root/.cargo && \
|
||||||
|
cargo install sccache --locked
|
||||||
ENV PATH="/root/.cargo/bin:$PATH"
|
|
||||||
RUN cargo install cargo-chef
|
|
||||||
|
|
||||||
# Cache dependencies
|
|
||||||
COPY --from=planner /usr/src/text-generation-inference/backends/trtllm/recipe.json .
|
|
||||||
RUN cargo chef cook --release --recipe-path recipe.json
|
|
||||||
|
|
||||||
# Build actual TGI
|
# Build actual TGI
|
||||||
|
ARG BUILD_TYPE
|
||||||
ARG CUDA_ARCH_LIST
|
ARG CUDA_ARCH_LIST
|
||||||
ENV CMAKE_PREFIX_PATH="/usr/local/mpi:/usr/local/tensorrt:$CMAKE_PREFIX_PATH"
|
ENV CMAKE_PREFIX_PATH="/usr/local/mpi:/usr/local/tensorrt:$CMAKE_PREFIX_PATH"
|
||||||
ENV LD_LIBRARY_PATH="/usr/local/mpi/lib:$LD_LIBRARY_PATH"
|
ENV LD_LIBRARY_PATH="/usr/local/mpi/lib:$LD_LIBRARY_PATH"
|
||||||
@ -83,8 +72,7 @@ COPY . .
|
|||||||
COPY --from=trt-builder /usr/local/tensorrt /usr/local/tensorrt
|
COPY --from=trt-builder /usr/local/tensorrt /usr/local/tensorrt
|
||||||
COPY --from=mpi-builder /usr/local/mpi /usr/local/mpi
|
COPY --from=mpi-builder /usr/local/mpi /usr/local/mpi
|
||||||
RUN mkdir $TGI_INSTALL_PREFIX && mkdir "$TGI_INSTALL_PREFIX/include" && mkdir "$TGI_INSTALL_PREFIX/lib" && \
|
RUN mkdir $TGI_INSTALL_PREFIX && mkdir "$TGI_INSTALL_PREFIX/include" && mkdir "$TGI_INSTALL_PREFIX/lib" && \
|
||||||
cd backends/trtllm && \
|
CMAKE_INSTALL_PREFIX=$TGI_INSTALL_PREFIX sccache build --${BUILD_TYPE} --package text-generation-backends-trtllm --bin text-generation-backends-trtllm
|
||||||
CMAKE_INSTALL_PREFIX=$TGI_INSTALL_PREFIX cargo build --release
|
|
||||||
|
|
||||||
FROM nvidia/cuda:12.6.3-cudnn-runtime-ubuntu24.04 AS runtime
|
FROM nvidia/cuda:12.6.3-cudnn-runtime-ubuntu24.04 AS runtime
|
||||||
RUN apt update && apt install -y libucx0 pipx python3-minimal python3-dev python3-pip python3-venv && \
|
RUN apt update && apt install -y libucx0 pipx python3-minimal python3-dev python3-pip python3-venv && \
|
||||||
|
Loading…
Reference in New Issue
Block a user