From 9ac26ed717027df5bb668d4d4abce3632678fc0e Mon Sep 17 00:00:00 2001
From: Morgan Funtowicz <funtowiczmo@gmail.com>
Date: Mon, 21 Oct 2024 16:57:46 +0200
Subject: [PATCH] feat(post_processing): max_new_tokens is const evaluated now

---
 backends/trtllm/src/looper.rs | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)
diff --git a/backends/trtllm/src/looper.rs b/backends/trtllm/src/looper.rs
index ce9a7728..e26155c1 100644
--- a/backends/trtllm/src/looper.rs
+++ b/backends/trtllm/src/looper.rs
@@ -159,9 +159,8 @@ fn executor_status_looper(
     }
 }
 
-fn post_processor_looper(
+fn post_processor_looper<const MAX_NUM_TOKENS: usize>(
     tokenizer: Tokenizer,
-    max_num_tokens: usize,
     max_inflight_requests: usize,
     mut decoded_tokens: UnboundedReceiver<(u64, InferResult<DecodedTokenContext>)>,
 ) {
@@ -180,7 +179,7 @@ fn post_processor_looper(
                         .entry(request_id)
                         .and_modify(|s| s.push(*&ctx.token.id))
                         .or_insert_with(|| {
-                            let mut state = Vec::with_capacity(max_num_tokens);
+                            let mut state = Vec::with_capacity(MAX_NUM_TOKENS);
                             state.push(*&ctx.token.id);
                             state
                         });
@@ -314,12 +313,7 @@ impl TensorRtLlmBackendV2 {
 
         // Post processor looper is responsible from receiving a bunch of tokens, decoding them and sending them back to the user
         let post_processor_looper = spawn_blocking(move || {
-            post_processor_looper(
-                tokenizer,
-                512,
-                max_inflight_requests,
-                post_processor_receiver,
-            )
+            post_processor_looper::<256>(tokenizer, max_inflight_requests, post_processor_receiver)
         });
 
         Ok(TensorRtLlmBackendV2 {