mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-06-19 15:52:08 +00:00
Fix batching
Signed-off-by: Adrien Gallouët <angt@huggingface.co>
This commit is contained in:
parent
2a51e415ff
commit
96434a1e7e
@ -429,12 +429,15 @@ impl LlamacppBackend {
|
||||
requests = Vec::new();
|
||||
continue;
|
||||
}
|
||||
if n_tokens + request.input_ids.len() > conf.max_batch_total_tokens as usize {
|
||||
let n_tokens_to_add = request.input_ids.len();
|
||||
|
||||
if n_tokens + n_tokens_to_add > conf.max_batch_total_tokens as usize {
|
||||
let _ = sync_tx.send(requests);
|
||||
n_tokens = request.input_ids.len();
|
||||
n_tokens = n_tokens_to_add;
|
||||
requests = vec![request];
|
||||
continue;
|
||||
}
|
||||
n_tokens += n_tokens_to_add;
|
||||
requests.push(request);
|
||||
},
|
||||
Err(_) => {
|
||||
@ -487,7 +490,7 @@ impl LlamacppBackend {
|
||||
seqs.push(LlamacppSeq {
|
||||
id: seq_id,
|
||||
batch_pos: llamacpp.batch.n_tokens as usize - 1,
|
||||
token: -1,
|
||||
token: bindings::LLAMA_TOKEN_NULL,
|
||||
pos: last_pos as bindings::llama_pos + 1,
|
||||
sampler: sampler,
|
||||
text: String::with_capacity(1024),
|
||||
|
Loading…
Reference in New Issue
Block a user