mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 12:24:53 +00:00
misc(backend): let's add some more tooling
This commit is contained in:
parent
a5e3e6ac24
commit
59118548a0
47
.github/workflows/build_trtllm.yaml
vendored
47
.github/workflows/build_trtllm.yaml
vendored
@ -50,6 +50,49 @@ jobs:
|
||||
install: true
|
||||
buildkitd-config: /tmp/buildkitd.toml
|
||||
|
||||
- name: Login to internal Container Registry
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.REGISTRY_USERNAME }}
|
||||
password: ${{ secrets.REGISTRY_PASSWORD }}
|
||||
registry: registry.internal.huggingface.tech
|
||||
|
||||
- name: Login to GitHub Container Registry
|
||||
if: github.event_name != 'pull_request'
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
# If pull request
|
||||
- name: Extract metadata (tags, labels) for Docker
|
||||
if: ${{ github.event_name == 'pull_request' }}
|
||||
id: meta-pr
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: |
|
||||
registry.internal.huggingface.tech/api-inference/community/text-generation-inference/tensorrt-llm
|
||||
tags: |
|
||||
type=raw,value=sha-${{ env.GITHUB_SHA_SHORT }}${{ env.LABEL }}
|
||||
|
||||
# If main, release or tag
|
||||
- name: Extract metadata (tags, labels) for Docker
|
||||
if: ${{ github.event_name != 'pull_request' }}
|
||||
id: meta
|
||||
uses: docker/metadata-action@v4.3.0
|
||||
with:
|
||||
flavor: |
|
||||
latest=auto
|
||||
images: |
|
||||
registry.internal.huggingface.tech/api-inference/community/text-generation-inference/tensorrt-llm
|
||||
# ghcr.io/huggingface/text-generation-inference
|
||||
tags: |
|
||||
type=semver,pattern={{version}}${{ env.LABEL }}
|
||||
type=semver,pattern={{major}}.{{minor}}${{ env.LABEL }}
|
||||
type=raw,value=latest${{ env.LABEL }},enable=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) }}
|
||||
type=raw,value=sha-${{ env.GITHUB_SHA_SHORT }}${{ env.LABEL }}
|
||||
|
||||
- name: Build and push Docker image
|
||||
id: build-and-push
|
||||
env:
|
||||
@ -59,7 +102,7 @@ jobs:
|
||||
context: .
|
||||
file: Dockerfile_trtllm
|
||||
target: ci-runtime
|
||||
push: false
|
||||
push: true
|
||||
load: true
|
||||
platforms: 'linux/amd64'
|
||||
build-args: |
|
||||
@ -78,5 +121,3 @@ jobs:
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -115,6 +115,7 @@ FROM runtime
|
||||
|
||||
LABEL co.huggingface.vendor="Hugging Face Inc."
|
||||
LABEL org.opencontainers.image.authors="hardware@hf.co"
|
||||
LABEL org.opencontainers.title="Text-Generation-Inference TensorRT-LLM Backend"
|
||||
|
||||
ENTRYPOINT ["./text-generation-launcher"]
|
||||
CMD ["--executor-worker", "/usr/local/tgi/bin/executorWorker"]
|
||||
@ -138,4 +139,10 @@ COPY --from=trt-builder /usr/local/tensorrt /usr/local/tensorrt
|
||||
COPY --from=tgi-builder /usr/local/tgi /usr/local/tgi
|
||||
|
||||
# Basically we copy from target/debug instead of target/release
|
||||
COPY --from=tgi-builder /usr/src/text-generation-inference/target/debug/text-generation-backends-trtllm /usr/local/tgi/bin/text-generation-launcher
|
||||
COPY --from=tgi-builder /usr/src/text-generation-inference/target/debug/text-generation-backends-trtllm /usr/local/tgi/bin/text-generation-launcher
|
||||
|
||||
FROM ci-runtime
|
||||
|
||||
LABEL co.huggingface.vendor="Hugging Face Inc."
|
||||
LABEL org.opencontainers.image.authors="hardware@hf.co"
|
||||
LABEL org.opencontainers.title="Text-Generation-Inference TensorRT-LLM Backend CI/CD"
|
@ -47,6 +47,7 @@ include(cmake/trtllm.cmake)
|
||||
if (CMAKE_BUILD_TYPE STREQUAL "Debug")
|
||||
set(TGI_TRTLLM_BACKEND_DEBUG ON)
|
||||
add_compile_definitions(TGI_TRTLLM_BACKEND_DEBUG=1)
|
||||
add_compile_definitions(SPDLOG_ACTIVE_LEVEL SPDLOG_LEVEL_TRACE)
|
||||
endif ()
|
||||
|
||||
if (${TGI_TRTLLM_BACKEND_BUILD_USE_LLD})
|
||||
@ -54,13 +55,6 @@ if (${TGI_TRTLLM_BACKEND_BUILD_USE_LLD})
|
||||
add_link_options("-fuse-ld=lld")
|
||||
endif ()
|
||||
|
||||
# This attempt to detect if the compiler can emit warning if it can't apply return value optimization from a function
|
||||
check_cxx_compiler_flag("-Wnrvo" COMPILER_SUPPORT_WARNING_ON_NVRO)
|
||||
if (${COMPILER_SUPPORT_WARNING_ON_NVRO})
|
||||
message(STATUS "Enabling non-NVRO detection")
|
||||
target_compile_options(tgi_trtllm_backend_impl "-Werror -Wnvro")
|
||||
endif ()
|
||||
|
||||
# Let's build TRTLLM as part of CMake
|
||||
add_subdirectory("${trtllm_SOURCE_DIR}/cpp" "${trtllm_SOURCE_DIR}/..")
|
||||
|
||||
@ -97,10 +91,19 @@ if (${TGI_TRTLLM_BACKEND_BUILD_TESTS})
|
||||
)
|
||||
FetchContent_MakeAvailable(Catch2)
|
||||
|
||||
# This attempt to detect if the compiler can emit warning if it can't apply return value optimization from a function
|
||||
check_cxx_compiler_flag("-Wnrvo" COMPILER_SUPPORT_WARNING_ON_NVRO)
|
||||
if (${COMPILER_SUPPORT_WARNING_ON_NVRO})
|
||||
message(STATUS "Enabling non-NVRO detection")
|
||||
target_compile_options(tgi_trtllm_backend_impl "-Werror -Wnvro")
|
||||
endif ()
|
||||
|
||||
cmake_path(GET TRTLLM_NVRTC_WRAPPER_LIBRARY_PATH PARENT_PATH TRTLLM_NVRTC_WRAPPER_PARENT_LIBRARY_PATH)
|
||||
message(STATUS "Adding linking path: ${TRTLLM_NVRTC_WRAPPER_PARENT_LIBRARY_PATH}")
|
||||
|
||||
add_executable(tgi_trtllm_backend_tests tests/test_hardware.cpp tests/test_backend.cpp)
|
||||
|
||||
target_compile_options(tgi_trtllm_backend_tests PRIVATE -Werror)
|
||||
target_link_directories(tgi_trtllm_backend_tests PRIVATE "${TRTLLM_NVRTC_WRAPPER_PARENT_LIBRARY_PATH}")
|
||||
target_include_directories(tgi_trtllm_backend_tests PUBLIC "${trtllm_SOURCE_DIR}/cpp/include")
|
||||
target_include_directories(tgi_trtllm_backend_tests PUBLIC "csrc/")
|
||||
@ -109,9 +112,32 @@ if (${TGI_TRTLLM_BACKEND_BUILD_TESTS})
|
||||
target_link_libraries(tgi_trtllm_backend_tests PRIVATE tensorrt_llm nvinfer_plugin_tensorrt_llm tensorrt_llm_nvrtc_wrapper)
|
||||
|
||||
if (CMAKE_BUILD_TYPE MATCHES "Debug")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=address -fsanitize=undefined")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address -fsanitize=undefined")
|
||||
target_link_options(tgi_trtllm_backend_tests BEFORE PUBLIC -fsanitize=undefined PUBLIC -fsanitize=address)
|
||||
if (NOT "${ASan_${CMAKE_C_COMPILER_ID}_FLAGS}" STREQUAL "")
|
||||
target_link_options(tgi_trtllm_backend_tests BEFORE PUBLIC -fsanitize=address)
|
||||
endif ()
|
||||
|
||||
if (NOT "${UBSan_${CMAKE_C_COMPILER_ID}_FLAGS}" STREQUAL "")
|
||||
target_link_options(tgi_trtllm_backend_tests BEFORE PUBLIC -fsanitize=undefined)
|
||||
endif ()
|
||||
|
||||
if (NOT "${MSan_${CMAKE_C_COMPILER_ID}_FLAGS}" STREQUAL "")
|
||||
target_link_options(tgi_trtllm_backend_tests BEFORE PUBLIC -fsanitize=memory)
|
||||
endif ()
|
||||
|
||||
# execute_process(COMMAND CMAKE_C_COMPILER --print-file-name=libasan.so OUTPUT_VARIABLE LIBASAN_PATH OUTPUT_STRIP_TRAILING_WHITESPACE RESULT_VARIABLE LIBASAN_FOUND)
|
||||
# execute_process(COMMAND CMAKE_C_COMPILER --print-file-name=libubsan.so OUTPUT_VARIABLE LIBUBSAN_PATH OUTPUT_STRIP_TRAILING_WHITESPACE RESULT_VARIABLE LIBUBSAN_FOUND)
|
||||
|
||||
# if (LIBASAN_FOUND)
|
||||
# set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=address")
|
||||
# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address")
|
||||
# target_link_options(tgi_trtllm_backend_tests BEFORE PUBLIC -fsanitize=address)
|
||||
# endif ()
|
||||
|
||||
# if (LIBUBSAN_FOUND)
|
||||
# set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=undefined")
|
||||
# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=undefined")
|
||||
# target_link_options(tgi_trtllm_backend_tests BEFORE PUBLIC -fsanitize=undefined)
|
||||
# endif ()
|
||||
endif ()
|
||||
|
||||
list(APPEND CMAKE_MODULE_PATH ${catch2_SOURCE_DIR}/extras)
|
||||
|
@ -103,6 +103,10 @@ fn build_backend(is_debug: bool, opt_level: &str, out_dir: &PathBuf) -> (PathBuf
|
||||
)
|
||||
.define("TGI_TRTLLM_BACKEND_TRT_ROOT", tensorrt_path);
|
||||
|
||||
if is_debug {
|
||||
config.define("TGI_TRTLLM_BACKEND_BUILD_TESTS", "ON");
|
||||
}
|
||||
|
||||
if option_env!("USE_LLD_LINKER").is_some() {
|
||||
println!("cargo:warning=Using lld linker");
|
||||
config.define("TGI_TRTLLM_BACKEND_BUILD_USE_LLD", "ON");
|
||||
|
Loading…
Reference in New Issue
Block a user