mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-23 16:02:10 +00:00
Revert "chore(trtllm): remove unused method"
This reverts commit 31747163
This commit is contained in:
parent
8d1c3c8ad4
commit
f5b9ee368a
@ -79,6 +79,12 @@ namespace huggingface::tgi::backends {
|
|||||||
const std::filesystem::path &executorWorker
|
const std::filesystem::path &executorWorker
|
||||||
);
|
);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Query the executor for the number of token available for pulling
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
[[nodiscard]] size_t NumResponsesReady() const;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Submit a new generation task to the executor
|
* Submit a new generation task to the executor
|
||||||
* @param tokens
|
* @param tokens
|
||||||
|
@ -106,6 +106,17 @@ huggingface::tgi::backends::TensorRtLlmBackend::TensorRtLlmBackend(
|
|||||||
maxNumTokens = config["/build_config/max_num_tokens"_json_pointer].get<uint32_t>();
|
maxNumTokens = config["/build_config/max_num_tokens"_json_pointer].get<uint32_t>();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[[nodiscard("Returned number of requests needs to be consumed")]]
|
||||||
|
size_t huggingface::tgi::backends::TensorRtLlmBackend::NumResponsesReady() const {
|
||||||
|
const auto numResponses = executor.getNumResponsesReady();
|
||||||
|
|
||||||
|
#ifndef NDEBUG
|
||||||
|
if(numResponses > 0) SPDLOG_INFO(FMT_STRING("Num responses ready: {:d}"), numResponses);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return numResponses;
|
||||||
|
}
|
||||||
|
|
||||||
[[nodiscard("Returned request id needs to be provided back to gather generated tokens")]]
|
[[nodiscard("Returned request id needs to be provided back to gather generated tokens")]]
|
||||||
tle::IdType huggingface::tgi::backends::TensorRtLlmBackend::Submit(
|
tle::IdType huggingface::tgi::backends::TensorRtLlmBackend::Submit(
|
||||||
const std::vector<tle::TokenIdType> &tokens,
|
const std::vector<tle::TokenIdType> &tokens,
|
||||||
@ -122,9 +133,8 @@ tle::IdType huggingface::tgi::backends::TensorRtLlmBackend::Submit(
|
|||||||
{
|
{
|
||||||
const auto &iterations = executor.getLatestIterationStats();
|
const auto &iterations = executor.getLatestIterationStats();
|
||||||
const auto &lastIteration = iterations.front();
|
const auto &lastIteration = iterations.front();
|
||||||
|
|
||||||
SPDLOG_DEBUG(FMT_EXECUTOR_STATS, fmt::join(tokens, ", "), lastIteration.numActiveRequests);
|
SPDLOG_DEBUG(FMT_EXECUTOR_STATS, fmt::join(tokens, ", "), lastIteration.numActiveRequests);
|
||||||
|
|
||||||
|
|
||||||
SPDLOG_DEBUG(FMT_SAMPLING_CONFIG, topK, topP, temperature, repetition_penalty, frequency_penalty, seed);
|
SPDLOG_DEBUG(FMT_SAMPLING_CONFIG, topK, topP, temperature, repetition_penalty, frequency_penalty, seed);
|
||||||
SPDLOG_DEBUG(FMT_STRING("Asking for max_new_tokens={:d}"), maxNewTokensChecked);
|
SPDLOG_DEBUG(FMT_STRING("Asking for max_new_tokens={:d}"), maxNewTokensChecked);
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user