feat(post_processing): max_new_tokens is const evaluated now

2025-07-13 03:10:17 +00:00 · 2024-10-21 16:57:46 +02:00 · 2024-10-21 16:57:46 +02:00 · 9ac26ed717
commit 9ac26ed717
parent cdac4b0058
1 changed files with 3 additions and 9 deletions
--- a/backends/trtllm/src/looper.rs
+++ b/backends/trtllm/src/looper.rs
@ -159,9 +159,8 @@ fn executor_status_looper(
    }
 }

-fn post_processor_looper(
+fn post_processor_looper<const MAX_NUM_TOKENS: usize>(
    tokenizer: Tokenizer,
-    max_num_tokens: usize,
    max_inflight_requests: usize,
    mut decoded_tokens: UnboundedReceiver<(u64, InferResult<DecodedTokenContext>)>,
 ) {
@ -180,7 +179,7 @@ fn post_processor_looper(
                        .entry(request_id)
                        .and_modify(|s| s.push(*&ctx.token.id))
                        .or_insert_with(|| {
-                            let mut state = Vec::with_capacity(max_num_tokens);
+                            let mut state = Vec::with_capacity(MAX_NUM_TOKENS);
                            state.push(*&ctx.token.id);
                            state
                        });
@ -314,12 +313,7 @@ impl TensorRtLlmBackendV2 {

        // Post processor looper is responsible from receiving a bunch of tokens, decoding them and sending them back to the user
        let post_processor_looper = spawn_blocking(move || {
-            post_processor_looper(
-                tokenizer,
-                512,
-                max_inflight_requests,
-                post_processor_receiver,
-            )
+            post_processor_looper::<256>(tokenizer, max_inflight_requests, post_processor_receiver)
        });

        Ok(TensorRtLlmBackendV2 {