From b8a40a0af3e1b781dd268dfafeda3186400beeb5 Mon Sep 17 00:00:00 2001 From: Morgan Funtowicz Date: Fri, 2 Aug 2024 22:14:03 +0000 Subject: [PATCH] (backend) cleanup a bit --- backends/trtllm/include/backend.h | 2 ++ backends/trtllm/lib/backend.cpp | 5 +++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/backends/trtllm/include/backend.h b/backends/trtllm/include/backend.h index bb31daa9..9fda8f87 100644 --- a/backends/trtllm/include/backend.h +++ b/backends/trtllm/include/backend.h @@ -23,6 +23,8 @@ namespace huggingface::tgi::backends { using RequestId = tle::IdType; using TokenId = tle::TokenIdType; + const static auto OUTPUT_CONFIG = tle::OutputConfig(true, false, false, true, false); + /** * Initialize all the components required by TRTLLM. * It is required to call this function before attempting to load any engine diff --git a/backends/trtllm/lib/backend.cpp b/backends/trtllm/lib/backend.cpp index dc9ffdaa..2eca477f 100644 --- a/backends/trtllm/lib/backend.cpp +++ b/backends/trtllm/lib/backend.cpp @@ -12,6 +12,7 @@ void huggingface::tgi::backends::InitializeBackend() { nvmlInit_v2(); initTrtLlmPlugins(); + SPDLOG_INFO("Backend Executor Version: {}", tle::version()); const auto numGpus = huggingface::hardware::cuda::GetNumDevices(); if (numGpus.has_value()) { SPDLOG_INFO("Detected {:d} Nvidia GPU(s)", numGpus.value()); @@ -22,7 +23,7 @@ void huggingface::tgi::backends::InitializeBackend() { [[nodiscard]] tle::ExecutorConfig huggingface::tgi::backends::GetExecutorConfig(const json &config, const std::string &workerPath) { - tle::ExecutorConfig execConfig(1); + tle::ExecutorConfig execConfig(/* maxBeamWidth = */ 1); // Retrieve the compute capabilities to enable some options at runtime const auto computeCapabilities = huggingface::hardware::cuda::GetCudaComputeCapabilities(); @@ -60,7 +61,7 @@ tle::SamplingConfig huggingface::tgi::backends::GetSamplingConfig( const float_t temperature, const float_t repetition_penalty, const float_t frequency_penalty, - const uint64_t seed) { + const uint64_t seed) noexcept { return tle::SamplingConfig( 1, // TGI only use a single beam topK,