feat(backend): add guard to multiple header definitions

This commit is contained in:
Morgan Funtowicz 2024-12-03 14:07:49 +01:00
parent 16ba2f5a2b
commit c94b9de445
3 changed files with 18 additions and 13 deletions

View File

@ -1,9 +1,11 @@
#include <ranges>
#include "backend.hpp"
#include <nlohmann/json.hpp>
#include <spdlog/spdlog.h>
#include "backend.hpp"
#include "hardware.hpp"
namespace huggingface::tgi::backends::trtllm {
constexpr tle::ParallelConfig backend_workspace_t::parallel_config() const {
// Single engine (TP = PP = 1) -> using leader mode (no MPI involved)
@ -23,7 +25,8 @@ namespace huggingface::tgi::backends::trtllm {
return tle::ParallelConfig(tle::CommunicationType::kMPI, mode, std::nullopt, std::nullopt, orchestratorConfig);
}
constexpr tle::ExecutorConfig backend_workspace_t::executor_config() const {
tle::ExecutorConfig backend_workspace_t::executor_config() const {
// Retrieve the compute capabilities to enable some options at runtime
const auto compute_capabilities = hardware::cuda::compute_capabilities_t();
@ -73,4 +76,4 @@ namespace huggingface::tgi::backends::trtllm {
SPDLOG_TRACE(FMT_STRING("Cancelling request: {:d}"), request_id);
executor_.cancelRequest(request_id);
}
}
}

View File

@ -1,7 +1,8 @@
#pragma once
#ifndef TGI_BACKEND_TRTLLM
#define TGI_BACKEND_TRTLLM
#include <cmath>
#include <cstdint>
#include <exception>
#include <expected>
#include <fstream>
#include <list>
@ -13,8 +14,6 @@
#include <tensorrt_llm/executor/executor.h>
#include <hardware.hpp>
namespace huggingface::tgi::backends::trtllm {
namespace tle = tensorrt_llm::executor;
using json = nlohmann::json;
@ -68,7 +67,7 @@ namespace huggingface::tgi::backends::trtllm {
float_t temperature;
std::list<std::vector<int32_t>> stop_words;
explicit generation_config_t(const json &config):
constexpr explicit generation_config_t(const json &config):
top_p(config.value("top_p", 1.0f)), temperature( config.value("temperature", 1.0f)), stop_words(0) {
if(config.contains("/eos_token_id"_json) && config["/eos_token_id"_json].is_array()) {
const auto& eos_token_id = config["eos_token_id"];
@ -121,7 +120,7 @@ namespace huggingface::tgi::backends::trtllm {
* `generation_config.json` holding default generation parameters.
* @return `generation_config_t`
*/
[[nodiscard]] const generation_config_t& generation_config() const { return generation_config_; }
[[nodiscard]] constexpr const generation_config_t& generation_config() const { return generation_config_; }
/**
* Factory method returning new `tensorrt_llm::executor::ParallelConfig` instance used
@ -135,7 +134,7 @@ namespace huggingface::tgi::backends::trtllm {
* to initialize `tensorrt_llm::executor::Executor`
* @return `tensorrt_llm::executor::ExecutorConfig` instance
*/
[[nodiscard]] constexpr tle::ExecutorConfig executor_config() const;
[[nodiscard]] tle::ExecutorConfig executor_config() const;
};
/**
@ -220,4 +219,5 @@ template <> struct fmt::formatter<huggingface::tgi::backends::trtllm::sampling_p
c.top_k, c.top_p, c.repetition_penalty, c.frequency_penalty, c.temperature, c.seed
);
}
};
};
#endif

View File

@ -1,4 +1,5 @@
#pragma once
#ifndef TGI_HARDWARE_CUDA
#define TGI_HARDWARE_CUDA
#include <cstdint>
#include <optional>
@ -77,4 +78,5 @@ namespace huggingface::tgi::hardware::cuda {
*/
[[nodiscard]] constexpr bool is_at_least_hopper() const { return is_at_least(HOPPER); }
};
}
}
#endif