mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-21 23:12:07 +00:00
Attempt to remove AWS S3 flaky cache for sccache (#2953)
* backend(trtllm): attempt to remove AWS S3 flaky cache for sccache * backend(trtllm): what if we expose ENV instead of inline? * backend(trtllm): and with the right env var for gha sccache * backend(trtllm): relax the way to detect sccache * backend(trtllm): make sccache definition manually * backend(trtllm): ok let's try to define the launchers in build.rs when rustc_wrapper is present * backend(trtllm): export env variable in run mb? * backend(trtllm): Cache mode max to cache intermediate layers * backend(trtllm): inject ompi_version build arg in dependent step
This commit is contained in:
parent
6cb41a80a1
commit
40b00275b2
47
.github/workflows/build.yaml
vendored
47
.github/workflows/build.yaml
vendored
@ -6,11 +6,11 @@ on:
|
||||
hardware:
|
||||
type: string
|
||||
description: Hardware
|
||||
# options:
|
||||
# - cuda
|
||||
# - cuda-trtllm
|
||||
# - rocm
|
||||
# - intel
|
||||
# options:
|
||||
# - cuda
|
||||
# - cuda-trtllm
|
||||
# - rocm
|
||||
# - intel
|
||||
required: true
|
||||
release-tests:
|
||||
description: "Run release integration tests"
|
||||
@ -41,19 +41,18 @@ jobs:
|
||||
uses: actions/checkout@v4
|
||||
- name: Inject slug/short variables
|
||||
uses: rlespinasse/github-slug-action@v4.4.1
|
||||
- name: Inject required variables for sccache to interact with Github Actions Cache
|
||||
uses: actions/github-script@v7
|
||||
with:
|
||||
script: |
|
||||
core.exportVariable('ACTIONS_CACHE_URL', process.env.ACTIONS_CACHE_URL || '');
|
||||
core.exportVariable('ACTIONS_RUNTIME_TOKEN', process.env.ACTIONS_RUNTIME_TOKEN || '');
|
||||
|
||||
- name: Extract TensorRT-LLM version
|
||||
run: |
|
||||
echo "TENSORRT_LLM_VERSION=$(grep -oP '([a-z,0-9]{40})' $GITHUB_WORKSPACE/backends/trtllm/cmake/trtllm.cmake)" >> $GITHUB_ENV
|
||||
echo "TensorRT-LLM version: ${{ env.TENSORRT_LLM_VERSION }}"
|
||||
- name: "Configure AWS Credentials"
|
||||
id: aws-creds
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
aws-region: us-east-1
|
||||
role-to-assume: ${{ secrets.AWS_ROLE_GITHUB_TGI_TEST }}
|
||||
role-duration-seconds: 7200
|
||||
output-credentials: true
|
||||
- name: Construct harware variables
|
||||
- name: Construct hardware variables
|
||||
shell: bash
|
||||
run: |
|
||||
case ${{ inputs.hardware }} in
|
||||
@ -75,9 +74,6 @@ jobs:
|
||||
export runs_on="ubuntu-latest"
|
||||
export platform=""
|
||||
export extra_pytest=""
|
||||
export target="ci-runtime"
|
||||
export sccache_s3_key_prefix="trtllm"
|
||||
export sccache_region="us-east-1"
|
||||
export build_type="dev"
|
||||
;;
|
||||
rocm)
|
||||
@ -128,8 +124,6 @@ jobs:
|
||||
echo "EXTRA_PYTEST=${extra_pytest}" >> $GITHUB_ENV
|
||||
echo REGISTRY_MIRROR=$REGISTRY_MIRROR >> $GITHUB_ENV
|
||||
echo "TARGET=${target}" >> $GITHUB_ENV
|
||||
echo "SCCACHE_S3_KEY_PREFIX=${sccache_s3_key_prefix}" >> $GITHUB_ENV
|
||||
echo "SCCACHE_REGION=${sccache_region}" >> $GITHUB_ENV
|
||||
echo "BUILD_TYPE=${build_type}" >> $GITHUB_ENV
|
||||
- name: Initialize Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
@ -196,17 +190,14 @@ jobs:
|
||||
DOCKER_LABEL=sha-${{ env.GITHUB_SHA_SHORT }}${{ env.LABEL }}
|
||||
PLATFORM=${{ env.PLATFORM }}
|
||||
build_type=${{ env.BUILD_TYPE }}
|
||||
is_gha_build=true
|
||||
aws_access_key_id=${{ steps.aws-creds.outputs.aws-access-key-id }}
|
||||
aws_secret_access_key=${{ steps.aws-creds.outputs.aws-secret-access-key }}
|
||||
aws_session_token=${{ steps.aws-creds.outputs.aws-session-token }}
|
||||
sccache_bucket=${{ secrets.AWS_S3_BUCKET_GITHUB_TGI_TEST }}
|
||||
sccache_s3_key_prefix=${{ env.SCCACHE_S3_KEY_PREFIX }}
|
||||
sccache_region=${{ env.SCCACHE_REGION }}
|
||||
sccache_gha_enabled=on
|
||||
actions_cache_url=${{ env.ACTIONS_CACHE_URL }}
|
||||
actions_runtime_token=${{ env.ACTIONS_RUNTIME_TOKEN }}
|
||||
|
||||
tags: ${{ steps.meta.outputs.tags || steps.meta-pr.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels || steps.meta-pr.outputs.labels }}
|
||||
cache-from: type=s3,region=us-east-1,bucket=ci-docker-buildx-cache,name=text-generation-inference-cache${{ env.LABEL }},mode=min,access_key_id=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_ACCESS_KEY_ID }},secret_access_key=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_SECRET_ACCESS_KEY }},mode=min
|
||||
cache-to: type=s3,region=us-east-1,bucket=ci-docker-buildx-cache,name=text-generation-inference-cache${{ env.LABEL }},mode=min,access_key_id=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_ACCESS_KEY_ID }},secret_access_key=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_SECRET_ACCESS_KEY }},mode=min
|
||||
cache-from: type=s3,region=us-east-1,bucket=ci-docker-buildx-cache,name=text-generation-inference-cache${{ env.LABEL }},mode=min,access_key_id=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_ACCESS_KEY_ID }},secret_access_key=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_SECRET_ACCESS_KEY }},mode=max
|
||||
cache-to: type=s3,region=us-east-1,bucket=ci-docker-buildx-cache,name=text-generation-inference-cache${{ env.LABEL }},mode=min,access_key_id=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_ACCESS_KEY_ID }},secret_access_key=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_SECRET_ACCESS_KEY }},mode=max
|
||||
- name: Final
|
||||
id: final
|
||||
run: |
|
||||
|
@ -1,7 +1,9 @@
|
||||
ARG cuda_arch_list="75-real;80-real;86-real;89-real;90-real"
|
||||
ARG ompi_version="4.1.7"
|
||||
ARG build_type=release
|
||||
ARG is_gha_build=false
|
||||
ARG ompi_version=4.1.7
|
||||
ARG sccache_gha_enabled=no
|
||||
ARG actions_cache_url=""
|
||||
ARG actions_runtime_token=""
|
||||
|
||||
# CUDA dependent dependencies resolver stage
|
||||
FROM nvidia/cuda:12.6.3-cudnn-devel-ubuntu24.04 AS cuda-builder
|
||||
@ -34,19 +36,19 @@ ENV TENSORRT_INSTALL_PREFIX=/usr/local/tensorrt
|
||||
|
||||
# Install OpenMPI
|
||||
FROM cuda-builder AS mpi-builder
|
||||
ARG ompi_version
|
||||
|
||||
ENV OMPI_TARBALL_FILENAME="openmpi-$ompi_version.tar.bz2"
|
||||
ADD --checksum=sha256:54a33cb7ad81ff0976f15a6cc8003c3922f0f3d8ceed14e1813ef3603f22cd34 \
|
||||
https://download.open-mpi.org/release/open-mpi/v4.1/$OMPI_TARBALL_FILENAME \
|
||||
/opt/src/mpi/
|
||||
|
||||
WORKDIR /opt/src/mpi
|
||||
RUN tar --strip-components=1 -xf $OMPI_TARBALL_FILENAME &&\
|
||||
|
||||
ARG ompi_version
|
||||
ENV OMPI_VERSION=${ompi_version}
|
||||
ENV OMPI_TARBALL_FILENAME=openmpi-${OMPI_VERSION}.tar.bz2
|
||||
ADD --checksum=sha256:54a33cb7ad81ff0976f15a6cc8003c3922f0f3d8ceed14e1813ef3603f22cd34 \
|
||||
https://download.open-mpi.org/release/open-mpi/v4.1/${OMPI_TARBALL_FILENAME} .
|
||||
|
||||
RUN tar --strip-components=1 -xf ${OMPI_TARBALL_FILENAME} &&\
|
||||
./configure --prefix=/usr/local/mpi --with-cuda=/usr/local/cuda --with-slurm && \
|
||||
make -j all && \
|
||||
make install && \
|
||||
rm -rf "/opt/src/$OMPI_TARBALL_FILENAME"
|
||||
rm -rf ${OMPI_TARBALL_FILENAME}/..
|
||||
|
||||
# Install TensorRT
|
||||
FROM cuda-builder AS trt-builder
|
||||
@ -59,8 +61,11 @@ FROM cuda-builder AS tgi-builder
|
||||
WORKDIR /usr/src/text-generation-inference
|
||||
|
||||
# Scoped global args reuse
|
||||
ARG is_gha_build
|
||||
ARG cuda_arch_list
|
||||
ARG build_type
|
||||
ARG sccache_gha_enabled
|
||||
ARG actions_cache_url
|
||||
ARG actions_runtime_token
|
||||
|
||||
# Install Rust
|
||||
ENV PATH="/root/.cargo/bin:$PATH"
|
||||
@ -69,28 +74,17 @@ RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | bash -s -- -y &&
|
||||
chmod -R a+w /root/.cargo && \
|
||||
cargo install sccache --locked
|
||||
|
||||
# SCCACHE Specifics args - before finding a better, more generic, way...
|
||||
ARG aws_access_key_id
|
||||
ARG aws_secret_access_key
|
||||
ARG aws_session_token
|
||||
ARG sccache_bucket
|
||||
ARG sccache_s3_key_prefix
|
||||
ARG sccache_region
|
||||
|
||||
ENV AWS_ACCESS_KEY_ID=$aws_access_key_id
|
||||
ENV AWS_SECRET_ACCESS_KEY=$aws_secret_access_key
|
||||
ENV AWS_SESSION_TOKEN=$aws_session_token
|
||||
ENV SCCACHE_BUCKET=$sccache_bucket
|
||||
ENV SCCACHE_S3_KEY_PREFIX=$sccache_s3_key_prefix
|
||||
ENV SCCACHE_REGION=$sccache_region
|
||||
|
||||
ENV LD_LIBRARY_PATH="/usr/local/mpi/lib:$LD_LIBRARY_PATH"
|
||||
ENV PKG_CONFIG_PATH="/usr/local/mpi/lib/pkgconfig:$PKG_CONFIG_PATH"
|
||||
ENV CMAKE_PREFIX_PATH="/usr/local/mpi:/usr/local/tensorrt:$CMAKE_PREFIX_PATH"
|
||||
ENV PKG_CONFIG_PATH="/usr/local/mpi/lib/pkgconfig"
|
||||
ENV CMAKE_PREFIX_PATH="/usr/local/mpi:/usr/local/tensorrt"
|
||||
|
||||
ENV USE_LLD_LINKER=ON
|
||||
ENV CUDA_ARCH_LIST=${cuda_arch_list}
|
||||
ENV IS_GHA_BUILD=${is_gha_build}
|
||||
|
||||
# SCCACHE Specifics args - before finding a better, more generic, way...
|
||||
ENV SCCACHE_GHA_ENABLED=${sccache_gha_enabled}
|
||||
ENV ACTIONS_CACHE_URL=${actions_cache_url}
|
||||
ENV ACTIONS_RUNTIME_TOKEN=${actions_runtime_token}
|
||||
|
||||
COPY Cargo.lock Cargo.lock
|
||||
COPY Cargo.toml Cargo.toml
|
||||
@ -102,10 +96,12 @@ COPY launcher launcher
|
||||
COPY --from=trt-builder /usr/local/tensorrt /usr/local/tensorrt
|
||||
COPY --from=mpi-builder /usr/local/mpi /usr/local/mpi
|
||||
|
||||
RUN mkdir $TGI_INSTALL_PREFIX && mkdir "$TGI_INSTALL_PREFIX/include" && mkdir "$TGI_INSTALL_PREFIX/lib" && \
|
||||
python3 backends/trtllm/scripts/setup_sccache.py --is-gha-build ${is_gha_build} && \
|
||||
CMAKE_INSTALL_PREFIX=$TGI_INSTALL_PREFIX \
|
||||
RUSTC_WRAPPER=sccache \
|
||||
ENV RUSTC_WRAPPER=sccache
|
||||
ENV CMAKE_INSTALL_PREFIX=$TGI_INSTALL_PREFIX
|
||||
RUN export CMAKE_C_COMPILER_LAUNCHER=sccache && \
|
||||
export CMAKE_CXX_COMPILER_LAUNCHER=sccache && \
|
||||
export CMAKE_CUDA_COMPILER_LAUNCHER=sccache && \
|
||||
mkdir $TGI_INSTALL_PREFIX && mkdir "$TGI_INSTALL_PREFIX/include" && mkdir "$TGI_INSTALL_PREFIX/lib" && \
|
||||
cargo build --profile ${build_type} --package text-generation-backends-trtllm --bin text-generation-backends-trtllm && \
|
||||
sccache --show-stats
|
||||
|
||||
|
@ -1,20 +1,5 @@
|
||||
cmake_minimum_required(VERSION 3.20)
|
||||
|
||||
if (NOT DEFINED CMAKE_CXX_COMPILER_LAUNCHER)
|
||||
find_program(CCACHE_EXECUTABLE "ccache")
|
||||
if (CCACHE_EXECUTABLE)
|
||||
message(STATUS "Using ccache")
|
||||
set(CMAKE_C_COMPILER_LAUNCHER "${CCACHE_EXECUTABLE}")
|
||||
set(CMAKE_CXX_COMPILER_LAUNCHER "${CCACHE_EXECUTABLE}")
|
||||
set(CMAKE_CUDA_COMPILER_LAUNCHER "${CCACHE_EXECUTABLE}")
|
||||
endif ()
|
||||
else ()
|
||||
message(STATUS "Using user specified cmake cxx compiler launcher: ${CMAKE_CXX_COMPILER_LAUNCHER}")
|
||||
set(CMAKE_C_COMPILER_LAUNCHER "${CMAKE_CXX_COMPILER_LAUNCHER}")
|
||||
set(CMAKE_CXX_COMPILER_LAUNCHER "${CMAKE_CXX_COMPILER_LAUNCHER}")
|
||||
set(CMAKE_CUDA_COMPILER_LAUNCHER "${CMAKE_CXX_COMPILER_LAUNCHER}")
|
||||
endif ()
|
||||
|
||||
if (CMAKE_VERSION VERSION_GREATER_EQUAL "3.24.0")
|
||||
cmake_policy(SET CMP0135 NEW)
|
||||
endif ()
|
||||
|
@ -14,7 +14,7 @@ const TENSORRT_ROOT_DIR: Option<&str> = option_env!("TENSORRT_ROOT_DIR");
|
||||
const NCCL_ROOT_DIR: Option<&str> = option_env!("NCCL_ROOT_DIR");
|
||||
|
||||
const IS_GHA_BUILD: LazyLock<bool> = LazyLock::new(|| {
|
||||
option_env!("IS_GHA_BUILD").map_or(false, |value| match value.to_lowercase().as_str() {
|
||||
option_env!("SCCACHE_GHA_ENABLED").map_or(false, |value| match value.to_lowercase().as_str() {
|
||||
"on" => true,
|
||||
"true" => true,
|
||||
"1" => true,
|
||||
@ -138,10 +138,9 @@ fn build_backend(is_debug: bool, opt_level: &str, out_dir: &PathBuf) -> (PathBuf
|
||||
|
||||
if let Some(wrapper) = option_env!("RUSTC_WRAPPER") {
|
||||
println!("cargo:warning=Using caching tool: {wrapper}");
|
||||
|
||||
env::set_var("CMAKE_C_COMPILER_LAUNCHER", wrapper);
|
||||
env::set_var("CMAKE_CXX_COMPILER_LAUNCHER", wrapper);
|
||||
env::set_var("CMAKE_CUDA_COMPILER_LAUNCHER", wrapper);
|
||||
config.define("CMAKE_C_COMPILER_LAUNCHER", wrapper);
|
||||
config.define("CMAKE_CXX_COMPILER_LAUNCHER", wrapper);
|
||||
config.define("CMAKE_CUDA_COMPILER_LAUNCHER", wrapper);
|
||||
}
|
||||
|
||||
// Allow to override which Python to use ...
|
||||
|
Loading…
Reference in New Issue
Block a user