From b8a40a0af3e1b781dd268dfafeda3186400beeb5 Mon Sep 17 00:00:00 2001
From: Morgan Funtowicz <morgan@huggingface.co>
Date: Fri, 2 Aug 2024 22:14:03 +0000
Subject: [PATCH] (backend) cleanup a bit

---
 backends/trtllm/include/backend.h | 2 ++
 backends/trtllm/lib/backend.cpp   | 5 +++--
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/backends/trtllm/include/backend.h b/backends/trtllm/include/backend.h
index bb31daa9..9fda8f87 100644
--- a/backends/trtllm/include/backend.h
+++ b/backends/trtllm/include/backend.h
@@ -23,6 +23,8 @@ namespace huggingface::tgi::backends {
     using RequestId = tle::IdType;
     using TokenId = tle::TokenIdType;
 
+    const static auto OUTPUT_CONFIG = tle::OutputConfig(true, false, false, true, false);
+
     /**
      * Initialize all the components required by TRTLLM.
      * It is required to call this function before attempting to load any engine
diff --git a/backends/trtllm/lib/backend.cpp b/backends/trtllm/lib/backend.cpp
index dc9ffdaa..2eca477f 100644
--- a/backends/trtllm/lib/backend.cpp
+++ b/backends/trtllm/lib/backend.cpp
@@ -12,6 +12,7 @@ void huggingface::tgi::backends::InitializeBackend() {
     nvmlInit_v2();
     initTrtLlmPlugins();
 
+    SPDLOG_INFO("Backend Executor Version: {}", tle::version());
     const auto numGpus = huggingface::hardware::cuda::GetNumDevices();
     if (numGpus.has_value()) {
         SPDLOG_INFO("Detected {:d} Nvidia GPU(s)", numGpus.value());
@@ -22,7 +23,7 @@ void huggingface::tgi::backends::InitializeBackend() {
 
 [[nodiscard]]
 tle::ExecutorConfig huggingface::tgi::backends::GetExecutorConfig(const json &config, const std::string &workerPath) {
-    tle::ExecutorConfig execConfig(1);
+    tle::ExecutorConfig execConfig(/* maxBeamWidth = */ 1);
 
     // Retrieve the compute capabilities to enable some options at runtime
     const auto computeCapabilities = huggingface::hardware::cuda::GetCudaComputeCapabilities();
@@ -60,7 +61,7 @@ tle::SamplingConfig huggingface::tgi::backends::GetSamplingConfig(
         const float_t temperature,
         const float_t repetition_penalty,
         const float_t frequency_penalty,
-        const uint64_t seed) {
+        const uint64_t seed) noexcept {
     return tle::SamplingConfig(
             1,  // TGI only use a single beam
             topK,