chore(router): minor refactorings

2025-07-05 15:30:19 +00:00 · 2024-10-22 23:05:10 +02:00 · 2024-10-22 23:05:10 +02:00 · 56106b4c27
commit 56106b4c27
parent 84f3bf902a
3 changed files with 25 additions and 21 deletions
--- a/backends/trtllm/include/backend.h
+++ b/backends/trtllm/include/backend.h
@ -44,23 +44,7 @@ namespace huggingface::tgi::backends {
    /**
     * Initialize logging mechanism
     */
-    void InitializeLogging() {
-#ifdef NDEBUG
-        if (const auto TRTLLM_LOG_LEVEL_CSTR = std::getenv("TRTLLM_LOG_LEVEL")) {
-        std::string log_level(TRTLLM_LOG_LEVEL_CSTR);
-        std::transform(log_level.begin(), log_level.end(), log_level.begin(), [](unsigned char c) {
-            return std::tolower(c);
-        });
-
-        if (log_level == "debug")
-            spdlog::set_level(spdlog::level::debug);
-        else
-            spdlog::set_level(spdlog::level::info);
-    }
-#else
-        spdlog::set_level(spdlog::level::debug);
-#endif
-    }
+    void InitializeLogging();


    /**
--- a/backends/trtllm/include/hardware.h
+++ b/backends/trtllm/include/hardware.h
@ -23,9 +23,9 @@ namespace huggingface::hardware::cuda {
        int32_t major;
        int32_t minor;

-        [[nodiscard]] constexpr bool isPostAmpere() const { return major >= AMPERE_SM_MAJOR; }
+        [[nodiscard]] constexpr bool IsPostAmpere() const { return major >= AMPERE_SM_MAJOR; }

-        [[nodiscard]] constexpr bool isPostHopper() const { return major >= HOPPER_SM_MAJOR; }
+        [[nodiscard]] constexpr bool IsPostHopper() const { return major >= HOPPER_SM_MAJOR; }
    };

    CudaComputeCapabilities GetCudaComputeCapabilities() {
--- a/backends/trtllm/lib/backend.cpp
+++ b/backends/trtllm/lib/backend.cpp
@ -8,6 +8,25 @@
 #include "backend.h"
 #include "hardware.h"

+
+void huggingface::tgi::backends::InitializeLogging() {
+#ifdef NDEBUG
+    if (const auto TRTLLM_LOG_LEVEL_CSTR = std::getenv("TRTLLM_LOG_LEVEL")) {
+        std::string log_level(TRTLLM_LOG_LEVEL_CSTR);
+        std::transform(log_level.begin(), log_level.end(), log_level.begin(), [](unsigned char c) {
+            return std::tolower(c);
+        });
+
+        if (log_level == "debug")
+            spdlog::set_level(spdlog::level::debug);
+        else
+            spdlog::set_level(spdlog::level::info);
+    }
+#else
+    spdlog::set_level(spdlog::level::debug);
+#endif
+}
+
 void huggingface::tgi::backends::InitializeBackend() {
    SPDLOG_INFO("Initializing Backend...");
    nvmlInit_v2();
@ -25,7 +44,8 @@ void huggingface::tgi::backends::InitializeBackend() {
 }

 [[nodiscard]]
-tle::ParallelConfig GetParallelConfig(const size_t worldSize, std::string workerPath) {
+tle::ParallelConfig
+huggingface::tgi::backends::GetParallelConfig(const size_t worldSize, const std::string workerPath) noexcept {
    auto mode = tle::CommunicationMode::kLEADER;
    std::optional<tle::OrchestratorConfig> orchestratorConfig = std::nullopt;

@ -53,7 +73,7 @@ tle::ExecutorConfig huggingface::tgi::backends::GetExecutorConfig(const json &co

    // Define some configuration variables
    execConfig.setKvCacheConfig(tle::KvCacheConfig(true));
-    execConfig.setEnableChunkedContext(computeCapabilities.isPostAmpere());
+    execConfig.setEnableChunkedContext(computeCapabilities.IsPostAmpere());
    execConfig.setSchedulerConfig(tle::SchedulerConfig(tle::CapacitySchedulerPolicy::kMAX_UTILIZATION));
    return execConfig;
 }