diff --git a/backends/trtllm/include/backend.h b/backends/trtllm/include/backend.h index f98da2a5..dee83e22 100644 --- a/backends/trtllm/include/backend.h +++ b/backends/trtllm/include/backend.h @@ -44,23 +44,7 @@ namespace huggingface::tgi::backends { /** * Initialize logging mechanism */ - void InitializeLogging() { -#ifdef NDEBUG - if (const auto TRTLLM_LOG_LEVEL_CSTR = std::getenv("TRTLLM_LOG_LEVEL")) { - std::string log_level(TRTLLM_LOG_LEVEL_CSTR); - std::transform(log_level.begin(), log_level.end(), log_level.begin(), [](unsigned char c) { - return std::tolower(c); - }); - - if (log_level == "debug") - spdlog::set_level(spdlog::level::debug); - else - spdlog::set_level(spdlog::level::info); - } -#else - spdlog::set_level(spdlog::level::debug); -#endif - } + void InitializeLogging(); /** diff --git a/backends/trtllm/include/hardware.h b/backends/trtllm/include/hardware.h index 584dd974..9633495f 100644 --- a/backends/trtllm/include/hardware.h +++ b/backends/trtllm/include/hardware.h @@ -23,9 +23,9 @@ namespace huggingface::hardware::cuda { int32_t major; int32_t minor; - [[nodiscard]] constexpr bool isPostAmpere() const { return major >= AMPERE_SM_MAJOR; } + [[nodiscard]] constexpr bool IsPostAmpere() const { return major >= AMPERE_SM_MAJOR; } - [[nodiscard]] constexpr bool isPostHopper() const { return major >= HOPPER_SM_MAJOR; } + [[nodiscard]] constexpr bool IsPostHopper() const { return major >= HOPPER_SM_MAJOR; } }; CudaComputeCapabilities GetCudaComputeCapabilities() { diff --git a/backends/trtllm/lib/backend.cpp b/backends/trtllm/lib/backend.cpp index e200ebc8..3fda9d62 100644 --- a/backends/trtllm/lib/backend.cpp +++ b/backends/trtllm/lib/backend.cpp @@ -8,6 +8,25 @@ #include "backend.h" #include "hardware.h" + +void huggingface::tgi::backends::InitializeLogging() { +#ifdef NDEBUG + if (const auto TRTLLM_LOG_LEVEL_CSTR = std::getenv("TRTLLM_LOG_LEVEL")) { + std::string log_level(TRTLLM_LOG_LEVEL_CSTR); + std::transform(log_level.begin(), log_level.end(), log_level.begin(), [](unsigned char c) { + return std::tolower(c); + }); + + if (log_level == "debug") + spdlog::set_level(spdlog::level::debug); + else + spdlog::set_level(spdlog::level::info); + } +#else + spdlog::set_level(spdlog::level::debug); +#endif +} + void huggingface::tgi::backends::InitializeBackend() { SPDLOG_INFO("Initializing Backend..."); nvmlInit_v2(); @@ -25,7 +44,8 @@ void huggingface::tgi::backends::InitializeBackend() { } [[nodiscard]] -tle::ParallelConfig GetParallelConfig(const size_t worldSize, std::string workerPath) { +tle::ParallelConfig +huggingface::tgi::backends::GetParallelConfig(const size_t worldSize, const std::string workerPath) noexcept { auto mode = tle::CommunicationMode::kLEADER; std::optional orchestratorConfig = std::nullopt; @@ -53,7 +73,7 @@ tle::ExecutorConfig huggingface::tgi::backends::GetExecutorConfig(const json &co // Define some configuration variables execConfig.setKvCacheConfig(tle::KvCacheConfig(true)); - execConfig.setEnableChunkedContext(computeCapabilities.isPostAmpere()); + execConfig.setEnableChunkedContext(computeCapabilities.IsPostAmpere()); execConfig.setSchedulerConfig(tle::SchedulerConfig(tle::CapacitySchedulerPolicy::kMAX_UTILIZATION)); return execConfig; }