Revert "chore(trtllm): remove unused method"

This reverts commit 31747163
2025-10-17 10:55:23 +00:00 · 2024-10-21 17:03:35 +02:00 · 2024-10-21 17:03:35 +02:00 · f5b9ee368a
commit f5b9ee368a
parent 8d1c3c8ad4
2 changed files with 18 additions and 2 deletions
--- a/backends/trtllm/include/backend.h
+++ b/backends/trtllm/include/backend.h
@ -79,6 +79,12 @@ namespace huggingface::tgi::backends {
                const std::filesystem::path &executorWorker
        );
        /**
         * Query the executor for the number of token available for pulling
         * @return
         */
        [[nodiscard]] size_t NumResponsesReady() const;
        /**
         * Submit a new generation task to the executor
         * @param tokens
--- a/backends/trtllm/lib/backend.cpp
+++ b/backends/trtllm/lib/backend.cpp
@ -106,6 +106,17 @@ huggingface::tgi::backends::TensorRtLlmBackend::TensorRtLlmBackend(
    maxNumTokens = config["/build_config/max_num_tokens"_json_pointer].get<uint32_t>();
 }
 [[nodiscard("Returned number of requests needs to be consumed")]]
 size_t huggingface::tgi::backends::TensorRtLlmBackend::NumResponsesReady() const {
    const auto numResponses = executor.getNumResponsesReady();
 #ifndef NDEBUG
    if(numResponses > 0) SPDLOG_INFO(FMT_STRING("Num responses ready: {:d}"), numResponses);
 #endif
    return numResponses;
 }
 [[nodiscard("Returned request id needs to be provided back to gather generated tokens")]]
 tle::IdType huggingface::tgi::backends::TensorRtLlmBackend::Submit(
        const std::vector<tle::TokenIdType> &tokens,
@ -122,9 +133,8 @@ tle::IdType huggingface::tgi::backends::TensorRtLlmBackend::Submit(
    {
        const auto &iterations = executor.getLatestIterationStats();
        const auto &lastIteration = iterations.front();
        SPDLOG_DEBUG(FMT_EXECUTOR_STATS, fmt::join(tokens, ", "), lastIteration.numActiveRequests);
        SPDLOG_DEBUG(FMT_SAMPLING_CONFIG, topK, topP, temperature, repetition_penalty, frequency_penalty, seed);
        SPDLOG_DEBUG(FMT_STRING("Asking for max_new_tokens={:d}"), maxNewTokensChecked);
    }