feat(backend): add guard to multiple header definitions

2025-07-30 20:00:16 +00:00 · 2024-12-03 14:07:49 +01:00 · 2024-12-03 14:07:49 +01:00 · c94b9de445
commit c94b9de445
parent 16ba2f5a2b
3 changed files with 18 additions and 13 deletions
--- a/backends/trtllm/csrc/backend.cpp
+++ b/backends/trtllm/csrc/backend.cpp
@ -1,9 +1,11 @@
 #include <ranges>
-#include "backend.hpp"

 #include <nlohmann/json.hpp>
 #include <spdlog/spdlog.h>

+#include "backend.hpp"
+#include "hardware.hpp"
+
 namespace huggingface::tgi::backends::trtllm {
    constexpr tle::ParallelConfig backend_workspace_t::parallel_config() const {
        // Single engine (TP = PP = 1) -> using leader mode (no MPI involved)
@ -23,7 +25,8 @@ namespace huggingface::tgi::backends::trtllm {
        return tle::ParallelConfig(tle::CommunicationType::kMPI, mode, std::nullopt, std::nullopt, orchestratorConfig);
    }

-    constexpr tle::ExecutorConfig backend_workspace_t::executor_config() const {
+
+    tle::ExecutorConfig backend_workspace_t::executor_config() const {
        // Retrieve the compute capabilities to enable some options at runtime
        const auto compute_capabilities = hardware::cuda::compute_capabilities_t();

@ -73,4 +76,4 @@ namespace huggingface::tgi::backends::trtllm {
        SPDLOG_TRACE(FMT_STRING("Cancelling request: {:d}"), request_id);
        executor_.cancelRequest(request_id);
    }
-}
+}
--- a/backends/trtllm/csrc/backend.hpp
+++ b/backends/trtllm/csrc/backend.hpp
@ -1,7 +1,8 @@
-#pragma once
+#ifndef TGI_BACKEND_TRTLLM
+#define TGI_BACKEND_TRTLLM
+
 #include <cmath>
 #include <cstdint>
-#include <exception>
 #include <expected>
 #include <fstream>
 #include <list>
@ -13,8 +14,6 @@

 #include <tensorrt_llm/executor/executor.h>

-#include <hardware.hpp>
-
 namespace huggingface::tgi::backends::trtllm {
    namespace tle = tensorrt_llm::executor;
    using json = nlohmann::json;
@ -68,7 +67,7 @@ namespace huggingface::tgi::backends::trtllm {
        float_t temperature;
        std::list<std::vector<int32_t>> stop_words;

-        explicit generation_config_t(const json &config):
+        constexpr explicit generation_config_t(const json &config):
            top_p(config.value("top_p", 1.0f)), temperature( config.value("temperature", 1.0f)), stop_words(0) {
            if(config.contains("/eos_token_id"_json) && config["/eos_token_id"_json].is_array()) {
                const auto& eos_token_id = config["eos_token_id"];
@ -121,7 +120,7 @@ namespace huggingface::tgi::backends::trtllm {
         * `generation_config.json` holding default generation parameters.
         * @return `generation_config_t`
         */
-        [[nodiscard]] const generation_config_t& generation_config() const { return generation_config_; }
+        [[nodiscard]] constexpr const generation_config_t& generation_config() const { return generation_config_; }

 /**
         * Factory method returning new `tensorrt_llm::executor::ParallelConfig` instance used
@ -135,7 +134,7 @@ namespace huggingface::tgi::backends::trtllm {
         * to initialize `tensorrt_llm::executor::Executor`
         * @return `tensorrt_llm::executor::ExecutorConfig` instance
         */
-        [[nodiscard]] constexpr tle::ExecutorConfig executor_config() const;
+        [[nodiscard]] tle::ExecutorConfig executor_config() const;
    };

    /**
@ -220,4 +219,5 @@ template <> struct fmt::formatter<huggingface::tgi::backends::trtllm::sampling_p
                c.top_k, c.top_p, c.repetition_penalty, c.frequency_penalty, c.temperature, c.seed
        );
    }
-};
+};
+#endif
--- a/backends/trtllm/csrc/hardware.hpp
+++ b/backends/trtllm/csrc/hardware.hpp
@ -1,4 +1,5 @@
-#pragma once
+#ifndef TGI_HARDWARE_CUDA
+#define TGI_HARDWARE_CUDA
 #include <cstdint>
 #include <optional>

@ -77,4 +78,5 @@ namespace huggingface::tgi::hardware::cuda {
         */
        [[nodiscard]] constexpr bool is_at_least_hopper() const { return is_at_least(HOPPER); }
    };
-}
+}
+#endif