chore(router): minor refactorings

This commit is contained in:
Morgan Funtowicz 2024-10-22 23:05:10 +02:00
parent 84f3bf902a
commit 56106b4c27
3 changed files with 25 additions and 21 deletions

View File

@ -44,23 +44,7 @@ namespace huggingface::tgi::backends {
/**
* Initialize logging mechanism
*/
void InitializeLogging() {
#ifdef NDEBUG
if (const auto TRTLLM_LOG_LEVEL_CSTR = std::getenv("TRTLLM_LOG_LEVEL")) {
std::string log_level(TRTLLM_LOG_LEVEL_CSTR);
std::transform(log_level.begin(), log_level.end(), log_level.begin(), [](unsigned char c) {
return std::tolower(c);
});
if (log_level == "debug")
spdlog::set_level(spdlog::level::debug);
else
spdlog::set_level(spdlog::level::info);
}
#else
spdlog::set_level(spdlog::level::debug);
#endif
}
void InitializeLogging();
/**

View File

@ -23,9 +23,9 @@ namespace huggingface::hardware::cuda {
int32_t major;
int32_t minor;
[[nodiscard]] constexpr bool isPostAmpere() const { return major >= AMPERE_SM_MAJOR; }
[[nodiscard]] constexpr bool IsPostAmpere() const { return major >= AMPERE_SM_MAJOR; }
[[nodiscard]] constexpr bool isPostHopper() const { return major >= HOPPER_SM_MAJOR; }
[[nodiscard]] constexpr bool IsPostHopper() const { return major >= HOPPER_SM_MAJOR; }
};
CudaComputeCapabilities GetCudaComputeCapabilities() {

View File

@ -8,6 +8,25 @@
#include "backend.h"
#include "hardware.h"
void huggingface::tgi::backends::InitializeLogging() {
#ifdef NDEBUG
if (const auto TRTLLM_LOG_LEVEL_CSTR = std::getenv("TRTLLM_LOG_LEVEL")) {
std::string log_level(TRTLLM_LOG_LEVEL_CSTR);
std::transform(log_level.begin(), log_level.end(), log_level.begin(), [](unsigned char c) {
return std::tolower(c);
});
if (log_level == "debug")
spdlog::set_level(spdlog::level::debug);
else
spdlog::set_level(spdlog::level::info);
}
#else
spdlog::set_level(spdlog::level::debug);
#endif
}
void huggingface::tgi::backends::InitializeBackend() {
SPDLOG_INFO("Initializing Backend...");
nvmlInit_v2();
@ -25,7 +44,8 @@ void huggingface::tgi::backends::InitializeBackend() {
}
[[nodiscard]]
tle::ParallelConfig GetParallelConfig(const size_t worldSize, std::string workerPath) {
tle::ParallelConfig
huggingface::tgi::backends::GetParallelConfig(const size_t worldSize, const std::string workerPath) noexcept {
auto mode = tle::CommunicationMode::kLEADER;
std::optional<tle::OrchestratorConfig> orchestratorConfig = std::nullopt;
@ -53,7 +73,7 @@ tle::ExecutorConfig huggingface::tgi::backends::GetExecutorConfig(const json &co
// Define some configuration variables
execConfig.setKvCacheConfig(tle::KvCacheConfig(true));
execConfig.setEnableChunkedContext(computeCapabilities.isPostAmpere());
execConfig.setEnableChunkedContext(computeCapabilities.IsPostAmpere());
execConfig.setSchedulerConfig(tle::SchedulerConfig(tle::CapacitySchedulerPolicy::kMAX_UTILIZATION));
return execConfig;
}