diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 27907e39..73a55efe 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -6,11 +6,11 @@ on: hardware: type: string description: Hardware - # options: - # - cuda - # - cuda-trtllm - # - rocm - # - intel + # options: + # - cuda + # - cuda-trtllm + # - rocm + # - intel required: true release-tests: description: "Run release integration tests" @@ -41,19 +41,18 @@ jobs: uses: actions/checkout@v4 - name: Inject slug/short variables uses: rlespinasse/github-slug-action@v4.4.1 + - name: Inject required variables for sccache to interact with Github Actions Cache + uses: actions/github-script@v7 + with: + script: | + core.exportVariable('ACTIONS_CACHE_URL', process.env.ACTIONS_CACHE_URL || ''); + core.exportVariable('ACTIONS_RUNTIME_TOKEN', process.env.ACTIONS_RUNTIME_TOKEN || ''); + - name: Extract TensorRT-LLM version run: | echo "TENSORRT_LLM_VERSION=$(grep -oP '([a-z,0-9]{40})' $GITHUB_WORKSPACE/backends/trtllm/cmake/trtllm.cmake)" >> $GITHUB_ENV echo "TensorRT-LLM version: ${{ env.TENSORRT_LLM_VERSION }}" - - name: "Configure AWS Credentials" - id: aws-creds - uses: aws-actions/configure-aws-credentials@v4 - with: - aws-region: us-east-1 - role-to-assume: ${{ secrets.AWS_ROLE_GITHUB_TGI_TEST }} - role-duration-seconds: 7200 - output-credentials: true - - name: Construct harware variables + - name: Construct hardware variables shell: bash run: | case ${{ inputs.hardware }} in @@ -75,9 +74,6 @@ jobs: export runs_on="ubuntu-latest" export platform="" export extra_pytest="" - export target="ci-runtime" - export sccache_s3_key_prefix="trtllm" - export sccache_region="us-east-1" export build_type="dev" ;; rocm) @@ -128,8 +124,6 @@ jobs: echo "EXTRA_PYTEST=${extra_pytest}" >> $GITHUB_ENV echo REGISTRY_MIRROR=$REGISTRY_MIRROR >> $GITHUB_ENV echo "TARGET=${target}" >> $GITHUB_ENV - echo "SCCACHE_S3_KEY_PREFIX=${sccache_s3_key_prefix}" >> $GITHUB_ENV - echo "SCCACHE_REGION=${sccache_region}" >> $GITHUB_ENV echo "BUILD_TYPE=${build_type}" >> $GITHUB_ENV - name: Initialize Docker Buildx uses: docker/setup-buildx-action@v3 @@ -196,17 +190,14 @@ jobs: DOCKER_LABEL=sha-${{ env.GITHUB_SHA_SHORT }}${{ env.LABEL }} PLATFORM=${{ env.PLATFORM }} build_type=${{ env.BUILD_TYPE }} - is_gha_build=true - aws_access_key_id=${{ steps.aws-creds.outputs.aws-access-key-id }} - aws_secret_access_key=${{ steps.aws-creds.outputs.aws-secret-access-key }} - aws_session_token=${{ steps.aws-creds.outputs.aws-session-token }} - sccache_bucket=${{ secrets.AWS_S3_BUCKET_GITHUB_TGI_TEST }} - sccache_s3_key_prefix=${{ env.SCCACHE_S3_KEY_PREFIX }} - sccache_region=${{ env.SCCACHE_REGION }} + sccache_gha_enabled=on + actions_cache_url=${{ env.ACTIONS_CACHE_URL }} + actions_runtime_token=${{ env.ACTIONS_RUNTIME_TOKEN }} + tags: ${{ steps.meta.outputs.tags || steps.meta-pr.outputs.tags }} labels: ${{ steps.meta.outputs.labels || steps.meta-pr.outputs.labels }} - cache-from: type=s3,region=us-east-1,bucket=ci-docker-buildx-cache,name=text-generation-inference-cache${{ env.LABEL }},mode=min,access_key_id=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_ACCESS_KEY_ID }},secret_access_key=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_SECRET_ACCESS_KEY }},mode=min - cache-to: type=s3,region=us-east-1,bucket=ci-docker-buildx-cache,name=text-generation-inference-cache${{ env.LABEL }},mode=min,access_key_id=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_ACCESS_KEY_ID }},secret_access_key=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_SECRET_ACCESS_KEY }},mode=min + cache-from: type=s3,region=us-east-1,bucket=ci-docker-buildx-cache,name=text-generation-inference-cache${{ env.LABEL }},mode=min,access_key_id=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_ACCESS_KEY_ID }},secret_access_key=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_SECRET_ACCESS_KEY }},mode=max + cache-to: type=s3,region=us-east-1,bucket=ci-docker-buildx-cache,name=text-generation-inference-cache${{ env.LABEL }},mode=min,access_key_id=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_ACCESS_KEY_ID }},secret_access_key=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_SECRET_ACCESS_KEY }},mode=max - name: Final id: final run: | diff --git a/Dockerfile_trtllm b/Dockerfile_trtllm index dd977a81..6538996a 100644 --- a/Dockerfile_trtllm +++ b/Dockerfile_trtllm @@ -1,7 +1,9 @@ ARG cuda_arch_list="75-real;80-real;86-real;89-real;90-real" -ARG ompi_version="4.1.7" ARG build_type=release -ARG is_gha_build=false +ARG ompi_version=4.1.7 +ARG sccache_gha_enabled=no +ARG actions_cache_url="" +ARG actions_runtime_token="" # CUDA dependent dependencies resolver stage FROM nvidia/cuda:12.6.3-cudnn-devel-ubuntu24.04 AS cuda-builder @@ -34,19 +36,19 @@ ENV TENSORRT_INSTALL_PREFIX=/usr/local/tensorrt # Install OpenMPI FROM cuda-builder AS mpi-builder -ARG ompi_version - -ENV OMPI_TARBALL_FILENAME="openmpi-$ompi_version.tar.bz2" -ADD --checksum=sha256:54a33cb7ad81ff0976f15a6cc8003c3922f0f3d8ceed14e1813ef3603f22cd34 \ - https://download.open-mpi.org/release/open-mpi/v4.1/$OMPI_TARBALL_FILENAME \ - /opt/src/mpi/ - WORKDIR /opt/src/mpi -RUN tar --strip-components=1 -xf $OMPI_TARBALL_FILENAME &&\ + +ARG ompi_version +ENV OMPI_VERSION=${ompi_version} +ENV OMPI_TARBALL_FILENAME=openmpi-${OMPI_VERSION}.tar.bz2 +ADD --checksum=sha256:54a33cb7ad81ff0976f15a6cc8003c3922f0f3d8ceed14e1813ef3603f22cd34 \ + https://download.open-mpi.org/release/open-mpi/v4.1/${OMPI_TARBALL_FILENAME} . + +RUN tar --strip-components=1 -xf ${OMPI_TARBALL_FILENAME} &&\ ./configure --prefix=/usr/local/mpi --with-cuda=/usr/local/cuda --with-slurm && \ make -j all && \ make install && \ - rm -rf "/opt/src/$OMPI_TARBALL_FILENAME" + rm -rf ${OMPI_TARBALL_FILENAME}/.. # Install TensorRT FROM cuda-builder AS trt-builder @@ -59,8 +61,11 @@ FROM cuda-builder AS tgi-builder WORKDIR /usr/src/text-generation-inference # Scoped global args reuse -ARG is_gha_build +ARG cuda_arch_list ARG build_type +ARG sccache_gha_enabled +ARG actions_cache_url +ARG actions_runtime_token # Install Rust ENV PATH="/root/.cargo/bin:$PATH" @@ -69,28 +74,17 @@ RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | bash -s -- -y && chmod -R a+w /root/.cargo && \ cargo install sccache --locked -# SCCACHE Specifics args - before finding a better, more generic, way... -ARG aws_access_key_id -ARG aws_secret_access_key -ARG aws_session_token -ARG sccache_bucket -ARG sccache_s3_key_prefix -ARG sccache_region - -ENV AWS_ACCESS_KEY_ID=$aws_access_key_id -ENV AWS_SECRET_ACCESS_KEY=$aws_secret_access_key -ENV AWS_SESSION_TOKEN=$aws_session_token -ENV SCCACHE_BUCKET=$sccache_bucket -ENV SCCACHE_S3_KEY_PREFIX=$sccache_s3_key_prefix -ENV SCCACHE_REGION=$sccache_region - ENV LD_LIBRARY_PATH="/usr/local/mpi/lib:$LD_LIBRARY_PATH" -ENV PKG_CONFIG_PATH="/usr/local/mpi/lib/pkgconfig:$PKG_CONFIG_PATH" -ENV CMAKE_PREFIX_PATH="/usr/local/mpi:/usr/local/tensorrt:$CMAKE_PREFIX_PATH" +ENV PKG_CONFIG_PATH="/usr/local/mpi/lib/pkgconfig" +ENV CMAKE_PREFIX_PATH="/usr/local/mpi:/usr/local/tensorrt" ENV USE_LLD_LINKER=ON ENV CUDA_ARCH_LIST=${cuda_arch_list} -ENV IS_GHA_BUILD=${is_gha_build} + +# SCCACHE Specifics args - before finding a better, more generic, way... +ENV SCCACHE_GHA_ENABLED=${sccache_gha_enabled} +ENV ACTIONS_CACHE_URL=${actions_cache_url} +ENV ACTIONS_RUNTIME_TOKEN=${actions_runtime_token} COPY Cargo.lock Cargo.lock COPY Cargo.toml Cargo.toml @@ -102,10 +96,12 @@ COPY launcher launcher COPY --from=trt-builder /usr/local/tensorrt /usr/local/tensorrt COPY --from=mpi-builder /usr/local/mpi /usr/local/mpi -RUN mkdir $TGI_INSTALL_PREFIX && mkdir "$TGI_INSTALL_PREFIX/include" && mkdir "$TGI_INSTALL_PREFIX/lib" && \ - python3 backends/trtllm/scripts/setup_sccache.py --is-gha-build ${is_gha_build} && \ - CMAKE_INSTALL_PREFIX=$TGI_INSTALL_PREFIX \ - RUSTC_WRAPPER=sccache \ +ENV RUSTC_WRAPPER=sccache +ENV CMAKE_INSTALL_PREFIX=$TGI_INSTALL_PREFIX +RUN export CMAKE_C_COMPILER_LAUNCHER=sccache && \ + export CMAKE_CXX_COMPILER_LAUNCHER=sccache && \ + export CMAKE_CUDA_COMPILER_LAUNCHER=sccache && \ + mkdir $TGI_INSTALL_PREFIX && mkdir "$TGI_INSTALL_PREFIX/include" && mkdir "$TGI_INSTALL_PREFIX/lib" && \ cargo build --profile ${build_type} --package text-generation-backends-trtllm --bin text-generation-backends-trtllm && \ sccache --show-stats diff --git a/backends/trtllm/CMakeLists.txt b/backends/trtllm/CMakeLists.txt index 8388f113..26af80be 100644 --- a/backends/trtllm/CMakeLists.txt +++ b/backends/trtllm/CMakeLists.txt @@ -1,20 +1,5 @@ cmake_minimum_required(VERSION 3.20) -if (NOT DEFINED CMAKE_CXX_COMPILER_LAUNCHER) - find_program(CCACHE_EXECUTABLE "ccache") - if (CCACHE_EXECUTABLE) - message(STATUS "Using ccache") - set(CMAKE_C_COMPILER_LAUNCHER "${CCACHE_EXECUTABLE}") - set(CMAKE_CXX_COMPILER_LAUNCHER "${CCACHE_EXECUTABLE}") - set(CMAKE_CUDA_COMPILER_LAUNCHER "${CCACHE_EXECUTABLE}") - endif () -else () - message(STATUS "Using user specified cmake cxx compiler launcher: ${CMAKE_CXX_COMPILER_LAUNCHER}") - set(CMAKE_C_COMPILER_LAUNCHER "${CMAKE_CXX_COMPILER_LAUNCHER}") - set(CMAKE_CXX_COMPILER_LAUNCHER "${CMAKE_CXX_COMPILER_LAUNCHER}") - set(CMAKE_CUDA_COMPILER_LAUNCHER "${CMAKE_CXX_COMPILER_LAUNCHER}") -endif () - if (CMAKE_VERSION VERSION_GREATER_EQUAL "3.24.0") cmake_policy(SET CMP0135 NEW) endif () diff --git a/backends/trtllm/build.rs b/backends/trtllm/build.rs index 8b041860..4d559fd4 100644 --- a/backends/trtllm/build.rs +++ b/backends/trtllm/build.rs @@ -14,7 +14,7 @@ const TENSORRT_ROOT_DIR: Option<&str> = option_env!("TENSORRT_ROOT_DIR"); const NCCL_ROOT_DIR: Option<&str> = option_env!("NCCL_ROOT_DIR"); const IS_GHA_BUILD: LazyLock = LazyLock::new(|| { - option_env!("IS_GHA_BUILD").map_or(false, |value| match value.to_lowercase().as_str() { + option_env!("SCCACHE_GHA_ENABLED").map_or(false, |value| match value.to_lowercase().as_str() { "on" => true, "true" => true, "1" => true, @@ -138,10 +138,9 @@ fn build_backend(is_debug: bool, opt_level: &str, out_dir: &PathBuf) -> (PathBuf if let Some(wrapper) = option_env!("RUSTC_WRAPPER") { println!("cargo:warning=Using caching tool: {wrapper}"); - - env::set_var("CMAKE_C_COMPILER_LAUNCHER", wrapper); - env::set_var("CMAKE_CXX_COMPILER_LAUNCHER", wrapper); - env::set_var("CMAKE_CUDA_COMPILER_LAUNCHER", wrapper); + config.define("CMAKE_C_COMPILER_LAUNCHER", wrapper); + config.define("CMAKE_CXX_COMPILER_LAUNCHER", wrapper); + config.define("CMAKE_CUDA_COMPILER_LAUNCHER", wrapper); } // Allow to override which Python to use ...