mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-24 16:32:12 +00:00
feat(post_processing): max_new_tokens is const evaluated now
This commit is contained in:
parent
cdac4b0058
commit
9ac26ed717
@ -159,9 +159,8 @@ fn executor_status_looper(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn post_processor_looper(
|
fn post_processor_looper<const MAX_NUM_TOKENS: usize>(
|
||||||
tokenizer: Tokenizer,
|
tokenizer: Tokenizer,
|
||||||
max_num_tokens: usize,
|
|
||||||
max_inflight_requests: usize,
|
max_inflight_requests: usize,
|
||||||
mut decoded_tokens: UnboundedReceiver<(u64, InferResult<DecodedTokenContext>)>,
|
mut decoded_tokens: UnboundedReceiver<(u64, InferResult<DecodedTokenContext>)>,
|
||||||
) {
|
) {
|
||||||
@ -180,7 +179,7 @@ fn post_processor_looper(
|
|||||||
.entry(request_id)
|
.entry(request_id)
|
||||||
.and_modify(|s| s.push(*&ctx.token.id))
|
.and_modify(|s| s.push(*&ctx.token.id))
|
||||||
.or_insert_with(|| {
|
.or_insert_with(|| {
|
||||||
let mut state = Vec::with_capacity(max_num_tokens);
|
let mut state = Vec::with_capacity(MAX_NUM_TOKENS);
|
||||||
state.push(*&ctx.token.id);
|
state.push(*&ctx.token.id);
|
||||||
state
|
state
|
||||||
});
|
});
|
||||||
@ -314,12 +313,7 @@ impl TensorRtLlmBackendV2 {
|
|||||||
|
|
||||||
// Post processor looper is responsible from receiving a bunch of tokens, decoding them and sending them back to the user
|
// Post processor looper is responsible from receiving a bunch of tokens, decoding them and sending them back to the user
|
||||||
let post_processor_looper = spawn_blocking(move || {
|
let post_processor_looper = spawn_blocking(move || {
|
||||||
post_processor_looper(
|
post_processor_looper::<256>(tokenizer, max_inflight_requests, post_processor_receiver)
|
||||||
tokenizer,
|
|
||||||
512,
|
|
||||||
max_inflight_requests,
|
|
||||||
post_processor_receiver,
|
|
||||||
)
|
|
||||||
});
|
});
|
||||||
|
|
||||||
Ok(TensorRtLlmBackendV2 {
|
Ok(TensorRtLlmBackendV2 {
|
||||||
|
Loading…
Reference in New Issue
Block a user