From c94b9de4457b6d87a62edffb31964d73295c19b9 Mon Sep 17 00:00:00 2001 From: Morgan Funtowicz Date: Tue, 3 Dec 2024 14:07:49 +0100 Subject: [PATCH] feat(backend): add guard to multiple header definitions --- backends/trtllm/csrc/backend.cpp | 9 ++++++--- backends/trtllm/csrc/backend.hpp | 16 ++++++++-------- backends/trtllm/csrc/hardware.hpp | 6 ++++-- 3 files changed, 18 insertions(+), 13 deletions(-) diff --git a/backends/trtllm/csrc/backend.cpp b/backends/trtllm/csrc/backend.cpp index e593f439..5e52261e 100644 --- a/backends/trtllm/csrc/backend.cpp +++ b/backends/trtllm/csrc/backend.cpp @@ -1,9 +1,11 @@ #include -#include "backend.hpp" #include #include +#include "backend.hpp" +#include "hardware.hpp" + namespace huggingface::tgi::backends::trtllm { constexpr tle::ParallelConfig backend_workspace_t::parallel_config() const { // Single engine (TP = PP = 1) -> using leader mode (no MPI involved) @@ -23,7 +25,8 @@ namespace huggingface::tgi::backends::trtllm { return tle::ParallelConfig(tle::CommunicationType::kMPI, mode, std::nullopt, std::nullopt, orchestratorConfig); } - constexpr tle::ExecutorConfig backend_workspace_t::executor_config() const { + + tle::ExecutorConfig backend_workspace_t::executor_config() const { // Retrieve the compute capabilities to enable some options at runtime const auto compute_capabilities = hardware::cuda::compute_capabilities_t(); @@ -73,4 +76,4 @@ namespace huggingface::tgi::backends::trtllm { SPDLOG_TRACE(FMT_STRING("Cancelling request: {:d}"), request_id); executor_.cancelRequest(request_id); } -} \ No newline at end of file +} diff --git a/backends/trtllm/csrc/backend.hpp b/backends/trtllm/csrc/backend.hpp index b6923782..f2f8673e 100644 --- a/backends/trtllm/csrc/backend.hpp +++ b/backends/trtllm/csrc/backend.hpp @@ -1,7 +1,8 @@ -#pragma once +#ifndef TGI_BACKEND_TRTLLM +#define TGI_BACKEND_TRTLLM + #include #include -#include #include #include #include @@ -13,8 +14,6 @@ #include -#include - namespace huggingface::tgi::backends::trtllm { namespace tle = tensorrt_llm::executor; using json = nlohmann::json; @@ -68,7 +67,7 @@ namespace huggingface::tgi::backends::trtllm { float_t temperature; std::list> stop_words; - explicit generation_config_t(const json &config): + constexpr explicit generation_config_t(const json &config): top_p(config.value("top_p", 1.0f)), temperature( config.value("temperature", 1.0f)), stop_words(0) { if(config.contains("/eos_token_id"_json) && config["/eos_token_id"_json].is_array()) { const auto& eos_token_id = config["eos_token_id"]; @@ -121,7 +120,7 @@ namespace huggingface::tgi::backends::trtllm { * `generation_config.json` holding default generation parameters. * @return `generation_config_t` */ - [[nodiscard]] const generation_config_t& generation_config() const { return generation_config_; } + [[nodiscard]] constexpr const generation_config_t& generation_config() const { return generation_config_; } /** * Factory method returning new `tensorrt_llm::executor::ParallelConfig` instance used @@ -135,7 +134,7 @@ namespace huggingface::tgi::backends::trtllm { * to initialize `tensorrt_llm::executor::Executor` * @return `tensorrt_llm::executor::ExecutorConfig` instance */ - [[nodiscard]] constexpr tle::ExecutorConfig executor_config() const; + [[nodiscard]] tle::ExecutorConfig executor_config() const; }; /** @@ -220,4 +219,5 @@ template <> struct fmt::formatter #include @@ -77,4 +78,5 @@ namespace huggingface::tgi::hardware::cuda { */ [[nodiscard]] constexpr bool is_at_least_hopper() const { return is_at_least(HOPPER); } }; -} \ No newline at end of file +} +#endif \ No newline at end of file