Fix batching

Signed-off-by: Adrien Gallouët <angt@huggingface.co>
This commit is contained in:
Adrien Gallouët 2025-02-01 16:09:51 +00:00
parent 2a51e415ff
commit 96434a1e7e
No known key found for this signature in database

View File

@ -429,12 +429,15 @@ impl LlamacppBackend {
requests = Vec::new();
continue;
}
if n_tokens + request.input_ids.len() > conf.max_batch_total_tokens as usize {
let n_tokens_to_add = request.input_ids.len();
if n_tokens + n_tokens_to_add > conf.max_batch_total_tokens as usize {
let _ = sync_tx.send(requests);
n_tokens = request.input_ids.len();
n_tokens = n_tokens_to_add;
requests = vec![request];
continue;
}
n_tokens += n_tokens_to_add;
requests.push(request);
},
Err(_) => {
@ -487,7 +490,7 @@ impl LlamacppBackend {
seqs.push(LlamacppSeq {
id: seq_id,
batch_pos: llamacpp.batch.n_tokens as usize - 1,
token: -1,
token: bindings::LLAMA_TOKEN_NULL,
pos: last_pos as bindings::llama_pos + 1,
sampler: sampler,
text: String::with_capacity(1024),