chore(router): minor refactorings

2025-09-17 15:24:52 +00:00 · 2024-10-22 23:05:10 +02:00 · 2024-10-22 23:05:10 +02:00 · 56106b4c27
commit 56106b4c27
parent 84f3bf902a
3 changed files with 25 additions and 21 deletions
--- a/backends/trtllm/include/backend.h
+++ b/backends/trtllm/include/backend.h
@ -44,23 +44,7 @@ namespace huggingface::tgi::backends {
    /**
     * Initialize logging mechanism
     */
-    void InitializeLogging() {
+    void InitializeLogging();
 #ifdef NDEBUG
        if (const auto TRTLLM_LOG_LEVEL_CSTR = std::getenv("TRTLLM_LOG_LEVEL")) {
        std::string log_level(TRTLLM_LOG_LEVEL_CSTR);
        std::transform(log_level.begin(), log_level.end(), log_level.begin(), [](unsigned char c) {
            return std::tolower(c);
        });
        if (log_level == "debug")
            spdlog::set_level(spdlog::level::debug);
        else
            spdlog::set_level(spdlog::level::info);
    }
 #else
        spdlog::set_level(spdlog::level::debug);
 #endif
    }
    /**
--- a/backends/trtllm/include/hardware.h
+++ b/backends/trtllm/include/hardware.h
@ -23,9 +23,9 @@ namespace huggingface::hardware::cuda {
        int32_t major;
        int32_t minor;
-        [[nodiscard]] constexpr bool isPostAmpere() const { return major >= AMPERE_SM_MAJOR; }
+        [[nodiscard]] constexpr bool IsPostAmpere() const { return major >= AMPERE_SM_MAJOR; }
-        [[nodiscard]] constexpr bool isPostHopper() const { return major >= HOPPER_SM_MAJOR; }
+        [[nodiscard]] constexpr bool IsPostHopper() const { return major >= HOPPER_SM_MAJOR; }
    };
    CudaComputeCapabilities GetCudaComputeCapabilities() {
--- a/backends/trtllm/lib/backend.cpp
+++ b/backends/trtllm/lib/backend.cpp
@ -8,6 +8,25 @@
 #include "backend.h"
 #include "hardware.h"
 void huggingface::tgi::backends::InitializeLogging() {
 #ifdef NDEBUG
    if (const auto TRTLLM_LOG_LEVEL_CSTR = std::getenv("TRTLLM_LOG_LEVEL")) {
        std::string log_level(TRTLLM_LOG_LEVEL_CSTR);
        std::transform(log_level.begin(), log_level.end(), log_level.begin(), [](unsigned char c) {
            return std::tolower(c);
        });
        if (log_level == "debug")
            spdlog::set_level(spdlog::level::debug);
        else
            spdlog::set_level(spdlog::level::info);
    }
 #else
    spdlog::set_level(spdlog::level::debug);
 #endif
 }
 void huggingface::tgi::backends::InitializeBackend() {
    SPDLOG_INFO("Initializing Backend...");
    nvmlInit_v2();
@ -25,7 +44,8 @@ void huggingface::tgi::backends::InitializeBackend() {
 }
 [[nodiscard]]
-tle::ParallelConfig GetParallelConfig(const size_t worldSize, std::string workerPath) {
+tle::ParallelConfig
 huggingface::tgi::backends::GetParallelConfig(const size_t worldSize, const std::string workerPath) noexcept {
    auto mode = tle::CommunicationMode::kLEADER;
    std::optional<tle::OrchestratorConfig> orchestratorConfig = std::nullopt;
@ -53,7 +73,7 @@ tle::ExecutorConfig huggingface::tgi::backends::GetExecutorConfig(const json &co
    // Define some configuration variables
    execConfig.setKvCacheConfig(tle::KvCacheConfig(true));
-    execConfig.setEnableChunkedContext(computeCapabilities.isPostAmpere());
+    execConfig.setEnableChunkedContext(computeCapabilities.IsPostAmpere());
    execConfig.setSchedulerConfig(tle::SchedulerConfig(tle::CapacitySchedulerPolicy::kMAX_UTILIZATION));
    return execConfig;
 }