misc(backend): let's add some more tooling

This commit is contained in:
Morgan Funtowicz 2024-12-19 11:28:23 +01:00
parent 7a1785fa9f
commit aa6a143ba7
3 changed files with 88 additions and 14 deletions

View File

@ -50,6 +50,49 @@ jobs:
install: true install: true
buildkitd-config: /tmp/buildkitd.toml buildkitd-config: /tmp/buildkitd.toml
- name: Login to internal Container Registry
uses: docker/login-action@v3
with:
username: ${{ secrets.REGISTRY_USERNAME }}
password: ${{ secrets.REGISTRY_PASSWORD }}
registry: registry.internal.huggingface.tech
- name: Login to GitHub Container Registry
if: github.event_name != 'pull_request'
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
# If pull request
- name: Extract metadata (tags, labels) for Docker
if: ${{ github.event_name == 'pull_request' }}
id: meta-pr
uses: docker/metadata-action@v5
with:
images: |
registry.internal.huggingface.tech/api-inference/community/text-generation-inference/tensorrt-llm
tags: |
type=raw,value=sha-${{ env.GITHUB_SHA_SHORT }}${{ env.LABEL }}
# If main, release or tag
- name: Extract metadata (tags, labels) for Docker
if: ${{ github.event_name != 'pull_request' }}
id: meta
uses: docker/metadata-action@v4.3.0
with:
flavor: |
latest=auto
images: |
registry.internal.huggingface.tech/api-inference/community/text-generation-inference/tensorrt-llm
# ghcr.io/huggingface/text-generation-inference
tags: |
type=semver,pattern={{version}}${{ env.LABEL }}
type=semver,pattern={{major}}.{{minor}}${{ env.LABEL }}
type=raw,value=latest${{ env.LABEL }},enable=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) }}
type=raw,value=sha-${{ env.GITHUB_SHA_SHORT }}${{ env.LABEL }}
- name: Build and push Docker image - name: Build and push Docker image
id: build-and-push id: build-and-push
env: env:
@ -59,7 +102,7 @@ jobs:
context: . context: .
file: Dockerfile_trtllm file: Dockerfile_trtllm
target: ci-runtime target: ci-runtime
push: false push: true
load: true load: true
platforms: 'linux/amd64' platforms: 'linux/amd64'
build-args: | build-args: |
@ -78,5 +121,3 @@ jobs:

View File

@ -130,6 +130,7 @@ FROM runtime
LABEL co.huggingface.vendor="Hugging Face Inc." LABEL co.huggingface.vendor="Hugging Face Inc."
LABEL org.opencontainers.image.authors="hardware@hf.co" LABEL org.opencontainers.image.authors="hardware@hf.co"
LABEL org.opencontainers.title="Text-Generation-Inference TensorRT-LLM Backend"
ENTRYPOINT ["./text-generation-launcher"] ENTRYPOINT ["./text-generation-launcher"]
CMD ["--executor-worker", "/usr/local/tgi/bin/executorWorker"] CMD ["--executor-worker", "/usr/local/tgi/bin/executorWorker"]
@ -154,3 +155,9 @@ COPY --from=tgi-builder /usr/local/tgi /usr/local/tgi
# Basically we copy from target/debug instead of target/release # Basically we copy from target/debug instead of target/release
COPY --from=tgi-builder /usr/src/text-generation-inference/target/debug/text-generation-backends-trtllm /usr/local/tgi/bin/text-generation-launcher COPY --from=tgi-builder /usr/src/text-generation-inference/target/debug/text-generation-backends-trtllm /usr/local/tgi/bin/text-generation-launcher
FROM ci-runtime
LABEL co.huggingface.vendor="Hugging Face Inc."
LABEL org.opencontainers.image.authors="hardware@hf.co"
LABEL org.opencontainers.title="Text-Generation-Inference TensorRT-LLM Backend CI/CD"

View File

@ -47,6 +47,7 @@ include(cmake/trtllm.cmake)
if (CMAKE_BUILD_TYPE STREQUAL "Debug") if (CMAKE_BUILD_TYPE STREQUAL "Debug")
set(TGI_TRTLLM_BACKEND_DEBUG ON) set(TGI_TRTLLM_BACKEND_DEBUG ON)
add_compile_definitions(TGI_TRTLLM_BACKEND_DEBUG=1) add_compile_definitions(TGI_TRTLLM_BACKEND_DEBUG=1)
add_compile_definitions(SPDLOG_ACTIVE_LEVEL SPDLOG_LEVEL_TRACE)
endif () endif ()
if (${TGI_TRTLLM_BACKEND_BUILD_USE_LLD}) if (${TGI_TRTLLM_BACKEND_BUILD_USE_LLD})
@ -54,13 +55,6 @@ if (${TGI_TRTLLM_BACKEND_BUILD_USE_LLD})
add_link_options("-fuse-ld=lld") add_link_options("-fuse-ld=lld")
endif () endif ()
# This attempt to detect if the compiler can emit warning if it can't apply return value optimization from a function
check_cxx_compiler_flag("-Wnrvo" COMPILER_SUPPORT_WARNING_ON_NVRO)
if (${COMPILER_SUPPORT_WARNING_ON_NVRO})
message(STATUS "Enabling non-NVRO detection")
target_compile_options(tgi_trtllm_backend_impl "-Werror -Wnvro")
endif ()
# Let's build TRTLLM as part of CMake # Let's build TRTLLM as part of CMake
add_subdirectory("${trtllm_SOURCE_DIR}/cpp" "${trtllm_SOURCE_DIR}/..") add_subdirectory("${trtllm_SOURCE_DIR}/cpp" "${trtllm_SOURCE_DIR}/..")
@ -97,10 +91,19 @@ if (${TGI_TRTLLM_BACKEND_BUILD_TESTS})
) )
FetchContent_MakeAvailable(Catch2) FetchContent_MakeAvailable(Catch2)
# This attempt to detect if the compiler can emit warning if it can't apply return value optimization from a function
check_cxx_compiler_flag("-Wnrvo" COMPILER_SUPPORT_WARNING_ON_NVRO)
if (${COMPILER_SUPPORT_WARNING_ON_NVRO})
message(STATUS "Enabling non-NVRO detection")
target_compile_options(tgi_trtllm_backend_impl "-Werror -Wnvro")
endif ()
cmake_path(GET TRTLLM_NVRTC_WRAPPER_LIBRARY_PATH PARENT_PATH TRTLLM_NVRTC_WRAPPER_PARENT_LIBRARY_PATH) cmake_path(GET TRTLLM_NVRTC_WRAPPER_LIBRARY_PATH PARENT_PATH TRTLLM_NVRTC_WRAPPER_PARENT_LIBRARY_PATH)
message(STATUS "Adding linking path: ${TRTLLM_NVRTC_WRAPPER_PARENT_LIBRARY_PATH}") message(STATUS "Adding linking path: ${TRTLLM_NVRTC_WRAPPER_PARENT_LIBRARY_PATH}")
add_executable(tgi_trtllm_backend_tests tests/test_hardware.cpp tests/test_backend.cpp) add_executable(tgi_trtllm_backend_tests tests/test_hardware.cpp tests/test_backend.cpp)
target_compile_options(tgi_trtllm_backend_tests PRIVATE -Werror)
target_link_directories(tgi_trtllm_backend_tests PRIVATE "${TRTLLM_NVRTC_WRAPPER_PARENT_LIBRARY_PATH}") target_link_directories(tgi_trtllm_backend_tests PRIVATE "${TRTLLM_NVRTC_WRAPPER_PARENT_LIBRARY_PATH}")
target_include_directories(tgi_trtllm_backend_tests PUBLIC "${trtllm_SOURCE_DIR}/cpp/include") target_include_directories(tgi_trtllm_backend_tests PUBLIC "${trtllm_SOURCE_DIR}/cpp/include")
target_include_directories(tgi_trtllm_backend_tests PUBLIC "csrc/") target_include_directories(tgi_trtllm_backend_tests PUBLIC "csrc/")
@ -109,9 +112,32 @@ if (${TGI_TRTLLM_BACKEND_BUILD_TESTS})
target_link_libraries(tgi_trtllm_backend_tests PRIVATE tensorrt_llm nvinfer_plugin_tensorrt_llm tensorrt_llm_nvrtc_wrapper) target_link_libraries(tgi_trtllm_backend_tests PRIVATE tensorrt_llm nvinfer_plugin_tensorrt_llm tensorrt_llm_nvrtc_wrapper)
if (CMAKE_BUILD_TYPE MATCHES "Debug") if (CMAKE_BUILD_TYPE MATCHES "Debug")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=address -fsanitize=undefined") if (NOT "${ASan_${CMAKE_C_COMPILER_ID}_FLAGS}" STREQUAL "")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address -fsanitize=undefined") target_link_options(tgi_trtllm_backend_tests BEFORE PUBLIC -fsanitize=address)
target_link_options(tgi_trtllm_backend_tests BEFORE PUBLIC -fsanitize=undefined PUBLIC -fsanitize=address) endif ()
if (NOT "${UBSan_${CMAKE_C_COMPILER_ID}_FLAGS}" STREQUAL "")
target_link_options(tgi_trtllm_backend_tests BEFORE PUBLIC -fsanitize=undefined)
endif ()
if (NOT "${MSan_${CMAKE_C_COMPILER_ID}_FLAGS}" STREQUAL "")
target_link_options(tgi_trtllm_backend_tests BEFORE PUBLIC -fsanitize=memory)
endif ()
# execute_process(COMMAND CMAKE_C_COMPILER --print-file-name=libasan.so OUTPUT_VARIABLE LIBASAN_PATH OUTPUT_STRIP_TRAILING_WHITESPACE RESULT_VARIABLE LIBASAN_FOUND)
# execute_process(COMMAND CMAKE_C_COMPILER --print-file-name=libubsan.so OUTPUT_VARIABLE LIBUBSAN_PATH OUTPUT_STRIP_TRAILING_WHITESPACE RESULT_VARIABLE LIBUBSAN_FOUND)
# if (LIBASAN_FOUND)
# set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=address")
# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address")
# target_link_options(tgi_trtllm_backend_tests BEFORE PUBLIC -fsanitize=address)
# endif ()
# if (LIBUBSAN_FOUND)
# set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=undefined")
# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=undefined")
# target_link_options(tgi_trtllm_backend_tests BEFORE PUBLIC -fsanitize=undefined)
# endif ()
endif () endif ()
if(CMAKE_BUILD_TYPE MATCHES "Debug") if(CMAKE_BUILD_TYPE MATCHES "Debug")