(backend) cleanup a bit

This commit is contained in:
Morgan Funtowicz 2024-08-02 22:14:03 +00:00 committed by Morgan Funtowicz
parent 38b5263c61
commit b8a40a0af3
2 changed files with 5 additions and 2 deletions

View File

@ -23,6 +23,8 @@ namespace huggingface::tgi::backends {
using RequestId = tle::IdType;
using TokenId = tle::TokenIdType;
const static auto OUTPUT_CONFIG = tle::OutputConfig(true, false, false, true, false);
/**
* Initialize all the components required by TRTLLM.
* It is required to call this function before attempting to load any engine

View File

@ -12,6 +12,7 @@ void huggingface::tgi::backends::InitializeBackend() {
nvmlInit_v2();
initTrtLlmPlugins();
SPDLOG_INFO("Backend Executor Version: {}", tle::version());
const auto numGpus = huggingface::hardware::cuda::GetNumDevices();
if (numGpus.has_value()) {
SPDLOG_INFO("Detected {:d} Nvidia GPU(s)", numGpus.value());
@ -22,7 +23,7 @@ void huggingface::tgi::backends::InitializeBackend() {
[[nodiscard]]
tle::ExecutorConfig huggingface::tgi::backends::GetExecutorConfig(const json &config, const std::string &workerPath) {
tle::ExecutorConfig execConfig(1);
tle::ExecutorConfig execConfig(/* maxBeamWidth = */ 1);
// Retrieve the compute capabilities to enable some options at runtime
const auto computeCapabilities = huggingface::hardware::cuda::GetCudaComputeCapabilities();
@ -60,7 +61,7 @@ tle::SamplingConfig huggingface::tgi::backends::GetSamplingConfig(
const float_t temperature,
const float_t repetition_penalty,
const float_t frequency_penalty,
const uint64_t seed) {
const uint64_t seed) noexcept {
return tle::SamplingConfig(
1, // TGI only use a single beam
topK,