mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-06-19 15:52:08 +00:00
Only export the latest logits
Signed-off-by: Adrien Gallouët <angt@huggingface.co>
This commit is contained in:
parent
960c12bd6e
commit
161280f313
@ -474,19 +474,21 @@ impl LlamacppBackend {
|
|||||||
continue;
|
continue;
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
let last_pos = request.input_ids.len() - 1;
|
||||||
|
|
||||||
for (pos, &token_id) in request.input_ids.iter().enumerate() {
|
for (pos, &token_id) in request.input_ids.iter().enumerate() {
|
||||||
llamacpp.batch_push(
|
llamacpp.batch_push(
|
||||||
token_id as bindings::llama_token,
|
token_id as bindings::llama_token,
|
||||||
pos as bindings::llama_pos,
|
pos as bindings::llama_pos,
|
||||||
seq_id as bindings::llama_seq_id,
|
seq_id as bindings::llama_seq_id,
|
||||||
true, // TODO
|
pos == last_pos, // check samplers
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
seqs.push(LlamacppSeq {
|
seqs.push(LlamacppSeq {
|
||||||
id: seq_id,
|
id: seq_id,
|
||||||
batch_pos: llamacpp.batch.n_tokens as usize - 1,
|
batch_pos: llamacpp.batch.n_tokens as usize - 1,
|
||||||
token: -1,
|
token: -1,
|
||||||
pos: request.input_ids.len() as _,
|
pos: last_pos as bindings::llama_pos + 1,
|
||||||
sampler: sampler,
|
sampler: sampler,
|
||||||
text: String::with_capacity(1024),
|
text: String::with_capacity(1024),
|
||||||
n_new_tokens: 0,
|
n_new_tokens: 0,
|
||||||
|
Loading…
Reference in New Issue
Block a user