diff --git a/backends/v3/src/queue.rs b/backends/v3/src/queue.rs index 81af644a..2a8c4c53 100644 --- a/backends/v3/src/queue.rs +++ b/backends/v3/src/queue.rs @@ -291,11 +291,7 @@ impl State { None } Some(block_allocator) => { - if entry.request.input_length <= prefill_token_budget { - prefill_tokens += entry.request.input_length; - } else { - prefill_tokens = prefill_token_budget; - } + prefill_tokens += entry.request.input_length; let max_new_tokens = match self.window_size { None => entry.request.stopping_parameters.max_new_tokens, Some(window_size) => min(