diff --git a/backends/trtllm/include/backend.h b/backends/trtllm/include/backend.h
index cbfaacf1..abba906e 100644
--- a/backends/trtllm/include/backend.h
+++ b/backends/trtllm/include/backend.h
@@ -20,6 +20,9 @@
 using json = nlohmann::json;
 namespace tle = tensorrt_llm::executor;
 
+
+#define CAST_SIZETYPE(x) static_cast<tle::SizeType32>(x)
+
 namespace huggingface::tgi::backends {
     using RequestId = tle::IdType;
     using TokenId = tle::TokenIdType;
diff --git a/backends/trtllm/lib/backend.cpp b/backends/trtllm/lib/backend.cpp
index a9d37bc1..ee8171bc 100644
--- a/backends/trtllm/lib/backend.cpp
+++ b/backends/trtllm/lib/backend.cpp
@@ -164,10 +164,9 @@ tle::IdType huggingface::tgi::backends::TensorRtLlmBackend::Submit(
 #endif
 
     const auto sampling = GetSamplingConfig(topK, topP, temperature, repetitionPenalty, frequencyPenalty, seed);
-    const auto maxNewTokensChecked_ = static_cast<tle::SizeType32>(maxNewTokensChecked);
 
     // Build the request
-    auto request = tle::Request{tokens, maxNewTokensChecked_, true, sampling, OUTPUT_CONFIG};
+    auto request = tle::Request{tokens, CAST_SIZETYPE(maxNewTokensChecked), true, sampling, OUTPUT_CONFIG};
     request.setStopWords(stopWords);
 
     // Submit to the executor for batching