(backend) cleanup a bit

2025-07-03 22:40:17 +00:00 · 2024-08-02 22:14:03 +00:00 · 2024-08-02 22:14:03 +00:00 · b8a40a0af3
commit b8a40a0af3
parent 38b5263c61
2 changed files with 5 additions and 2 deletions
--- a/backends/trtllm/include/backend.h
+++ b/backends/trtllm/include/backend.h
@ -23,6 +23,8 @@ namespace huggingface::tgi::backends {
    using RequestId = tle::IdType;
    using TokenId = tle::TokenIdType;

+    const static auto OUTPUT_CONFIG = tle::OutputConfig(true, false, false, true, false);
+
    /**
     * Initialize all the components required by TRTLLM.
     * It is required to call this function before attempting to load any engine
--- a/backends/trtllm/lib/backend.cpp
+++ b/backends/trtllm/lib/backend.cpp
@ -12,6 +12,7 @@ void huggingface::tgi::backends::InitializeBackend() {
    nvmlInit_v2();
    initTrtLlmPlugins();

+    SPDLOG_INFO("Backend Executor Version: {}", tle::version());
    const auto numGpus = huggingface::hardware::cuda::GetNumDevices();
    if (numGpus.has_value()) {
        SPDLOG_INFO("Detected {:d} Nvidia GPU(s)", numGpus.value());
@ -22,7 +23,7 @@ void huggingface::tgi::backends::InitializeBackend() {

 [[nodiscard]]
 tle::ExecutorConfig huggingface::tgi::backends::GetExecutorConfig(const json &config, const std::string &workerPath) {
-    tle::ExecutorConfig execConfig(1);
+    tle::ExecutorConfig execConfig(/* maxBeamWidth = */ 1);

    // Retrieve the compute capabilities to enable some options at runtime
    const auto computeCapabilities = huggingface::hardware::cuda::GetCudaComputeCapabilities();
@ -60,7 +61,7 @@ tle::SamplingConfig huggingface::tgi::backends::GetSamplingConfig(
        const float_t temperature,
        const float_t repetition_penalty,
        const float_t frequency_penalty,
-        const uint64_t seed) {
+        const uint64_t seed) noexcept {
    return tle::SamplingConfig(
            1,  // TGI only use a single beam
            topK,