From c94b9de4457b6d87a62edffb31964d73295c19b9 Mon Sep 17 00:00:00 2001
From: Morgan Funtowicz <morgan@huggingface.co>
Date: Tue, 3 Dec 2024 14:07:49 +0100
Subject: [PATCH] feat(backend): add guard to multiple header definitions

---
 backends/trtllm/csrc/backend.cpp  |  9 ++++++---
 backends/trtllm/csrc/backend.hpp  | 16 ++++++++--------
 backends/trtllm/csrc/hardware.hpp |  6 ++++--
 3 files changed, 18 insertions(+), 13 deletions(-)
diff --git a/backends/trtllm/csrc/backend.cpp b/backends/trtllm/csrc/backend.cpp
index e593f439..5e52261e 100644
--- a/backends/trtllm/csrc/backend.cpp
+++ b/backends/trtllm/csrc/backend.cpp
@@ -1,9 +1,11 @@
 #include <ranges>
-#include "backend.hpp"
 
 #include <nlohmann/json.hpp>
 #include <spdlog/spdlog.h>
 
+#include "backend.hpp"
+#include "hardware.hpp"
+
 namespace huggingface::tgi::backends::trtllm {
     constexpr tle::ParallelConfig backend_workspace_t::parallel_config() const {
         // Single engine (TP = PP = 1) -> using leader mode (no MPI involved)
@@ -23,7 +25,8 @@ namespace huggingface::tgi::backends::trtllm {
         return tle::ParallelConfig(tle::CommunicationType::kMPI, mode, std::nullopt, std::nullopt, orchestratorConfig);
     }
 
-    constexpr tle::ExecutorConfig backend_workspace_t::executor_config() const {
+
+    tle::ExecutorConfig backend_workspace_t::executor_config() const {
         // Retrieve the compute capabilities to enable some options at runtime
         const auto compute_capabilities = hardware::cuda::compute_capabilities_t();
 
@@ -73,4 +76,4 @@ namespace huggingface::tgi::backends::trtllm {
         SPDLOG_TRACE(FMT_STRING("Cancelling request: {:d}"), request_id);
         executor_.cancelRequest(request_id);
     }
-}
\ No newline at end of file
+}
diff --git a/backends/trtllm/csrc/backend.hpp b/backends/trtllm/csrc/backend.hpp
index b6923782..f2f8673e 100644
--- a/backends/trtllm/csrc/backend.hpp
+++ b/backends/trtllm/csrc/backend.hpp
@@ -1,7 +1,8 @@
-#pragma once
+#ifndef TGI_BACKEND_TRTLLM
+#define TGI_BACKEND_TRTLLM
+
 #include <cmath>
 #include <cstdint>
-#include <exception>
 #include <expected>
 #include <fstream>
 #include <list>
@@ -13,8 +14,6 @@
 
 #include <tensorrt_llm/executor/executor.h>
 
-#include <hardware.hpp>
-
 namespace huggingface::tgi::backends::trtllm {
     namespace tle = tensorrt_llm::executor;
     using json = nlohmann::json;
@@ -68,7 +67,7 @@ namespace huggingface::tgi::backends::trtllm {
         float_t temperature;
         std::list<std::vector<int32_t>> stop_words;
 
-        explicit generation_config_t(const json &config):
+        constexpr explicit generation_config_t(const json &config):
             top_p(config.value("top_p", 1.0f)), temperature( config.value("temperature", 1.0f)), stop_words(0) {
             if(config.contains("/eos_token_id"_json) && config["/eos_token_id"_json].is_array()) {
                 const auto& eos_token_id = config["eos_token_id"];
@@ -121,7 +120,7 @@ namespace huggingface::tgi::backends::trtllm {
          * `generation_config.json` holding default generation parameters.
          * @return `generation_config_t`
          */
-        [[nodiscard]] const generation_config_t& generation_config() const { return generation_config_; }
+        [[nodiscard]] constexpr const generation_config_t& generation_config() const { return generation_config_; }
 
 /**
          * Factory method returning new `tensorrt_llm::executor::ParallelConfig` instance used
@@ -135,7 +134,7 @@ namespace huggingface::tgi::backends::trtllm {
          * to initialize `tensorrt_llm::executor::Executor`
          * @return `tensorrt_llm::executor::ExecutorConfig` instance
          */
-        [[nodiscard]] constexpr tle::ExecutorConfig executor_config() const;
+        [[nodiscard]] tle::ExecutorConfig executor_config() const;
     };
 
     /**
@@ -220,4 +219,5 @@ template <> struct fmt::formatter<huggingface::tgi::backends::trtllm::sampling_p
                 c.top_k, c.top_p, c.repetition_penalty, c.frequency_penalty, c.temperature, c.seed
         );
     }
-};
\ No newline at end of file
+};
+#endif
\ No newline at end of file
diff --git a/backends/trtllm/csrc/hardware.hpp b/backends/trtllm/csrc/hardware.hpp
index b7000885..480cf680 100644
--- a/backends/trtllm/csrc/hardware.hpp
+++ b/backends/trtllm/csrc/hardware.hpp
@@ -1,4 +1,5 @@
-#pragma once
+#ifndef TGI_HARDWARE_CUDA
+#define TGI_HARDWARE_CUDA
 #include <cstdint>
 #include <optional>
 
@@ -77,4 +78,5 @@ namespace huggingface::tgi::hardware::cuda {
          */
         [[nodiscard]] constexpr bool is_at_least_hopper() const { return is_at_least(HOPPER); }
     };
-}
\ No newline at end of file
+}
+#endif
\ No newline at end of file