mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-06-19 15:52:08 +00:00
Only export the latest logits
Signed-off-by: Adrien Gallouët <angt@huggingface.co>
This commit is contained in:
parent
960c12bd6e
commit
161280f313
@ -474,19 +474,21 @@ impl LlamacppBackend {
|
||||
continue;
|
||||
},
|
||||
};
|
||||
let last_pos = request.input_ids.len() - 1;
|
||||
|
||||
for (pos, &token_id) in request.input_ids.iter().enumerate() {
|
||||
llamacpp.batch_push(
|
||||
token_id as bindings::llama_token,
|
||||
pos as bindings::llama_pos,
|
||||
seq_id as bindings::llama_seq_id,
|
||||
true, // TODO
|
||||
pos == last_pos, // check samplers
|
||||
);
|
||||
}
|
||||
seqs.push(LlamacppSeq {
|
||||
id: seq_id,
|
||||
batch_pos: llamacpp.batch.n_tokens as usize - 1,
|
||||
token: -1,
|
||||
pos: request.input_ids.len() as _,
|
||||
pos: last_pos as bindings::llama_pos + 1,
|
||||
sampler: sampler,
|
||||
text: String::with_capacity(1024),
|
||||
n_new_tokens: 0,
|
||||
|
Loading…
Reference in New Issue
Block a user