chore(trtllm): define a macro for SizeType cast

2025-04-26 12:32:10 +00:00 · 2024-10-21 23:39:08 +02:00 · 2024-10-21 23:39:08 +02:00 · d5c8bdc53b
commit d5c8bdc53b
parent 7217cafadb
2 changed files with 4 additions and 2 deletions
--- a/backends/trtllm/include/backend.h
+++ b/backends/trtllm/include/backend.h
@ -20,6 +20,9 @@
 using json = nlohmann::json;
 namespace tle = tensorrt_llm::executor;

+
+#define CAST_SIZETYPE(x) static_cast<tle::SizeType32>(x)
+
 namespace huggingface::tgi::backends {
    using RequestId = tle::IdType;
    using TokenId = tle::TokenIdType;
--- a/backends/trtllm/lib/backend.cpp
+++ b/backends/trtllm/lib/backend.cpp
@ -164,10 +164,9 @@ tle::IdType huggingface::tgi::backends::TensorRtLlmBackend::Submit(
 #endif

    const auto sampling = GetSamplingConfig(topK, topP, temperature, repetitionPenalty, frequencyPenalty, seed);
-    const auto maxNewTokensChecked_ = static_cast<tle::SizeType32>(maxNewTokensChecked);

    // Build the request
-    auto request = tle::Request{tokens, maxNewTokensChecked_, true, sampling, OUTPUT_CONFIG};
+    auto request = tle::Request{tokens, CAST_SIZETYPE(maxNewTokensChecked), true, sampling, OUTPUT_CONFIG};
    request.setStopWords(stopWords);

    // Submit to the executor for batching