mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-06-19 15:52:08 +00:00
Fix batching
Signed-off-by: Adrien Gallouët <angt@huggingface.co>
This commit is contained in:
parent
2a51e415ff
commit
96434a1e7e
@ -429,12 +429,15 @@ impl LlamacppBackend {
|
|||||||
requests = Vec::new();
|
requests = Vec::new();
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if n_tokens + request.input_ids.len() > conf.max_batch_total_tokens as usize {
|
let n_tokens_to_add = request.input_ids.len();
|
||||||
|
|
||||||
|
if n_tokens + n_tokens_to_add > conf.max_batch_total_tokens as usize {
|
||||||
let _ = sync_tx.send(requests);
|
let _ = sync_tx.send(requests);
|
||||||
n_tokens = request.input_ids.len();
|
n_tokens = n_tokens_to_add;
|
||||||
requests = vec![request];
|
requests = vec![request];
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
n_tokens += n_tokens_to_add;
|
||||||
requests.push(request);
|
requests.push(request);
|
||||||
},
|
},
|
||||||
Err(_) => {
|
Err(_) => {
|
||||||
@ -487,7 +490,7 @@ impl LlamacppBackend {
|
|||||||
seqs.push(LlamacppSeq {
|
seqs.push(LlamacppSeq {
|
||||||
id: seq_id,
|
id: seq_id,
|
||||||
batch_pos: llamacpp.batch.n_tokens as usize - 1,
|
batch_pos: llamacpp.batch.n_tokens as usize - 1,
|
||||||
token: -1,
|
token: bindings::LLAMA_TOKEN_NULL,
|
||||||
pos: last_pos as bindings::llama_pos + 1,
|
pos: last_pos as bindings::llama_pos + 1,
|
||||||
sampler: sampler,
|
sampler: sampler,
|
||||||
text: String::with_capacity(1024),
|
text: String::with_capacity(1024),
|
||||||
|
Loading…
Reference in New Issue
Block a user