Clear request cache after completion

Signed-off-by: Adrien Gallouët <angt@huggingface.co>
This commit is contained in:
Adrien Gallouët 2025-02-01 20:20:43 +00:00
parent c8505fb300
commit 8ed362d03a
No known key found for this signature in database

View File

@ -507,10 +507,10 @@ impl LlamacppBackend {
bindings::llama_decode(llamacpp.ctx, llamacpp.batch) bindings::llama_decode(llamacpp.ctx, llamacpp.batch)
}; };
if decode != 0 { if decode != 0 {
if decode == 1 { warn!("llama_decode failed: kv cache clear + sync");
unsafe { unsafe {
bindings::llama_kv_cache_clear(llamacpp.ctx); // TODO: remove this ? bindings::llama_kv_cache_clear(llamacpp.ctx);
} bindings::llama_synchronize(llamacpp.ctx);
} }
for seq in seqs.iter_mut() { for seq in seqs.iter_mut() {
let _ = requests[seq.id].tx.send(Err(InferError::IncompleteGeneration)); let _ = requests[seq.id].tx.send(Err(InferError::IncompleteGeneration));
@ -588,6 +588,10 @@ impl LlamacppBackend {
if seq.running { if seq.running {
seq.batch_pos = llamacpp.batch_push(seq.token, seq.pos, seq.id as _, true); seq.batch_pos = llamacpp.batch_push(seq.token, seq.pos, seq.id as _, true);
seq.pos += 1; seq.pos += 1;
} else {
unsafe {
bindings::llama_kv_cache_seq_rm(llamacpp.ctx, seq.id as _, -1, -1);
}
} }
} }
} }