mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-24 08:22:07 +00:00
chore(router): minor refactorings
This commit is contained in:
parent
84f3bf902a
commit
56106b4c27
@ -44,23 +44,7 @@ namespace huggingface::tgi::backends {
|
|||||||
/**
|
/**
|
||||||
* Initialize logging mechanism
|
* Initialize logging mechanism
|
||||||
*/
|
*/
|
||||||
void InitializeLogging() {
|
void InitializeLogging();
|
||||||
#ifdef NDEBUG
|
|
||||||
if (const auto TRTLLM_LOG_LEVEL_CSTR = std::getenv("TRTLLM_LOG_LEVEL")) {
|
|
||||||
std::string log_level(TRTLLM_LOG_LEVEL_CSTR);
|
|
||||||
std::transform(log_level.begin(), log_level.end(), log_level.begin(), [](unsigned char c) {
|
|
||||||
return std::tolower(c);
|
|
||||||
});
|
|
||||||
|
|
||||||
if (log_level == "debug")
|
|
||||||
spdlog::set_level(spdlog::level::debug);
|
|
||||||
else
|
|
||||||
spdlog::set_level(spdlog::level::info);
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
spdlog::set_level(spdlog::level::debug);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -23,9 +23,9 @@ namespace huggingface::hardware::cuda {
|
|||||||
int32_t major;
|
int32_t major;
|
||||||
int32_t minor;
|
int32_t minor;
|
||||||
|
|
||||||
[[nodiscard]] constexpr bool isPostAmpere() const { return major >= AMPERE_SM_MAJOR; }
|
[[nodiscard]] constexpr bool IsPostAmpere() const { return major >= AMPERE_SM_MAJOR; }
|
||||||
|
|
||||||
[[nodiscard]] constexpr bool isPostHopper() const { return major >= HOPPER_SM_MAJOR; }
|
[[nodiscard]] constexpr bool IsPostHopper() const { return major >= HOPPER_SM_MAJOR; }
|
||||||
};
|
};
|
||||||
|
|
||||||
CudaComputeCapabilities GetCudaComputeCapabilities() {
|
CudaComputeCapabilities GetCudaComputeCapabilities() {
|
||||||
|
@ -8,6 +8,25 @@
|
|||||||
#include "backend.h"
|
#include "backend.h"
|
||||||
#include "hardware.h"
|
#include "hardware.h"
|
||||||
|
|
||||||
|
|
||||||
|
void huggingface::tgi::backends::InitializeLogging() {
|
||||||
|
#ifdef NDEBUG
|
||||||
|
if (const auto TRTLLM_LOG_LEVEL_CSTR = std::getenv("TRTLLM_LOG_LEVEL")) {
|
||||||
|
std::string log_level(TRTLLM_LOG_LEVEL_CSTR);
|
||||||
|
std::transform(log_level.begin(), log_level.end(), log_level.begin(), [](unsigned char c) {
|
||||||
|
return std::tolower(c);
|
||||||
|
});
|
||||||
|
|
||||||
|
if (log_level == "debug")
|
||||||
|
spdlog::set_level(spdlog::level::debug);
|
||||||
|
else
|
||||||
|
spdlog::set_level(spdlog::level::info);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
spdlog::set_level(spdlog::level::debug);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
void huggingface::tgi::backends::InitializeBackend() {
|
void huggingface::tgi::backends::InitializeBackend() {
|
||||||
SPDLOG_INFO("Initializing Backend...");
|
SPDLOG_INFO("Initializing Backend...");
|
||||||
nvmlInit_v2();
|
nvmlInit_v2();
|
||||||
@ -25,7 +44,8 @@ void huggingface::tgi::backends::InitializeBackend() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
[[nodiscard]]
|
[[nodiscard]]
|
||||||
tle::ParallelConfig GetParallelConfig(const size_t worldSize, std::string workerPath) {
|
tle::ParallelConfig
|
||||||
|
huggingface::tgi::backends::GetParallelConfig(const size_t worldSize, const std::string workerPath) noexcept {
|
||||||
auto mode = tle::CommunicationMode::kLEADER;
|
auto mode = tle::CommunicationMode::kLEADER;
|
||||||
std::optional<tle::OrchestratorConfig> orchestratorConfig = std::nullopt;
|
std::optional<tle::OrchestratorConfig> orchestratorConfig = std::nullopt;
|
||||||
|
|
||||||
@ -53,7 +73,7 @@ tle::ExecutorConfig huggingface::tgi::backends::GetExecutorConfig(const json &co
|
|||||||
|
|
||||||
// Define some configuration variables
|
// Define some configuration variables
|
||||||
execConfig.setKvCacheConfig(tle::KvCacheConfig(true));
|
execConfig.setKvCacheConfig(tle::KvCacheConfig(true));
|
||||||
execConfig.setEnableChunkedContext(computeCapabilities.isPostAmpere());
|
execConfig.setEnableChunkedContext(computeCapabilities.IsPostAmpere());
|
||||||
execConfig.setSchedulerConfig(tle::SchedulerConfig(tle::CapacitySchedulerPolicy::kMAX_UTILIZATION));
|
execConfig.setSchedulerConfig(tle::SchedulerConfig(tle::CapacitySchedulerPolicy::kMAX_UTILIZATION));
|
||||||
return execConfig;
|
return execConfig;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user