mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-19 22:02:06 +00:00
* misc(cmake) update dependencies * feat(hardware) enable new hardware.hpp and unittests * test(ctest) enable address sanitizer * feat(backend): initial rewrite of the backend for simplicity * feat(backend): remove all the logs from hardware.hpp * feat(backend): added some logging * feat(backend): enable compiler warning if support for RVO not applying * feat(backend): missing return statement * feat(backend): introduce backend_workspace_t to store precomputed information from the engine folder * feat(backend): delete previous backend impl * feat(backend): more impl * feat(backend): use latest trtllm main version to have g++ >= 13 compatibility * feat(backend): allow overriding which Python to use * feat(backend): fix backend_exception_t -> backend_error_t naming * feat(backend): impl missing generation_step_t as return value of pull_tokens * feat(backend): make backend_workspace_t::engines_folder constexpr * feat(backend): fix main.rs retrieving the tokenizer * feat(backend): add guard to multiple header definitions * test(backend): add more unittest * feat(backend): remove constexpr from par * feat(backend): remove constexpig * test(backend): more test coverage * chore(trtllm): update dependency towards 0.15.0 * effectively cancel the request on the executor * feat(backend) fix moving backend when pulling * feat(backend): make sure we can easily cancel request on the executor * feat(backend): fix missing "0" field access * misc(backend): fix reborrowing Pin<&mut T> as described in the doc https://doc.rust-lang.org/stable/std/pin/struct.Pin.html#method.as_mut * chore: Add doc and CI for TRTLLM (#2799) * chore: Add doc and CI for TRTLLM * chore: Add doc and CI for TRTLLM * chore: Add doc and CI for TRTLLM * chore: Add doc and CI for TRTLLM * doc: Formatting * misc(backend): indent --------- Co-authored-by: Hugo Larcher <hugo.larcher@huggingface.co>
47 lines
2.1 KiB
CMake
47 lines
2.1 KiB
CMake
set(TRT_INCLUDE_DIR ${TGI_TRTLLM_BACKEND_TRT_INCLUDE_DIR})
|
|
set(TRT_LIB_DIR ${TGI_TRTLLM_BACKEND_TRT_LIB_DIR})
|
|
|
|
set(USE_CXX11_ABI ON)
|
|
set(BUILD_PYT OFF)
|
|
set(BUILD_PYBIND OFF)
|
|
set(BUILD_MICRO_BENCHMARKS OFF)
|
|
set(BUILD_BENCHMARKS OFF)
|
|
set(BUILD_TESTS OFF)
|
|
set(CMAKE_CUDA_ARCHITECTURES ${TGI_TRTLLM_BACKEND_TARGET_CUDA_ARCH_LIST})
|
|
|
|
message(STATUS "Building for CUDA Architectures: ${CMAKE_CUDA_ARCHITECTURES}")
|
|
|
|
set(ENABLE_UCX OFF)
|
|
if (${CMAKE_BUILD_TYPE} STREQUAL "Debug")
|
|
set(FAST_BUILD ON)
|
|
set(NVTX_DISABLE OFF)
|
|
else ()
|
|
set(FAST_BUILD OFF)
|
|
set(FAST_MATH ON)
|
|
set(NVTX_DISABLE ON)
|
|
endif ()
|
|
|
|
find_package(Python3 REQUIRED Interpreter)
|
|
|
|
fetchcontent_declare(
|
|
trtllm
|
|
GIT_REPOSITORY https://github.com/huggingface/TensorRT-LLM.git
|
|
GIT_TAG 1bb9ca4688805444f203647674bac1d7219d0579
|
|
GIT_SHALLOW ON
|
|
DOWNLOAD_EXTRACT_TIMESTAMP
|
|
)
|
|
fetchcontent_makeavailable(trtllm)
|
|
|
|
message(STATUS "Found TensorRT-LLM: ${trtllm_SOURCE_DIR}")
|
|
execute_process(COMMAND git lfs install WORKING_DIRECTORY "${trtllm_SOURCE_DIR}/")
|
|
execute_process(COMMAND git lfs pull WORKING_DIRECTORY "${trtllm_SOURCE_DIR}/")
|
|
|
|
# TRTLLM use a JIT based *precompiled* library to generate some specific kernels, we are generating the path to this one here
|
|
set(TRTLLM_NVRTC_LIBRARY_NAME "${CMAKE_SHARED_LIBRARY_PREFIX}tensorrt_llm_nvrtc_wrapper${CMAKE_SHARED_LIBRARY_SUFFIX}" CACHE INTERNAL "nvrtc wrapper library name")
|
|
set(TRTLLM_NVRTC_WRAPPER_LIBRARY_PATH "${trtllm_SOURCE_DIR}/cpp/tensorrt_llm/kernels/decoderMaskedMultiheadAttention/decoderXQAImplJIT/nvrtcWrapper/${CMAKE_LIBRARY_ARCHITECTURE}/${TRTLLM_NVRTC_LIBRARY_NAME}"
|
|
CACHE INTERNAL "nvrtc wrapper library path")
|
|
|
|
# The same Executor Static library
|
|
set(TRTLLM_EXECUTOR_STATIC_LIBRARY_NAME "${CMAKE_SHARED_LIBRARY_PREFIX}tensorrt_llm_executor_static${CMAKE_STATIC_LIBRARY_SUFFIX}" CACHE INTERNAL "executor_static library name")
|
|
set(TRTLLM_EXECUTOR_STATIC_LIBRARY_PATH "${trtllm_SOURCE_DIR}/cpp/tensorrt_llm/executor/${CMAKE_LIBRARY_ARCHITECTURE}/${TRTLLM_EXECUTOR_STATIC_LIBRARY_NAME}" CACHE INTERNAL "executor_static library path")
|