mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-06-19 15:52:08 +00:00
Cleanup
Signed-off-by: Adrien Gallouët <angt@huggingface.co>
This commit is contained in:
parent
104a968d01
commit
ea28332bb3
@ -212,11 +212,9 @@ impl Llamacpp {
|
|||||||
Ok(Llamacpp{model, ctx, vocab, logprobs, n_ctx, batch})
|
Ok(Llamacpp{model, ctx, vocab, logprobs, n_ctx, batch})
|
||||||
}
|
}
|
||||||
|
|
||||||
fn _batch_clear_logits(&mut self) {
|
fn clear_kv_cache(&mut self, seq_id: bindings::llama_seq_id) {
|
||||||
for n in 0..self.batch.n_tokens as usize{
|
unsafe {
|
||||||
unsafe {
|
bindings::llama_kv_cache_seq_rm(self.ctx, seq_id, -1, -1);
|
||||||
*self.batch.logits.add(n) = 0 as i8;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -473,11 +471,8 @@ impl LlamacppBackend {
|
|||||||
bindings::llama_decode(llamacpp.ctx, llamacpp.batch)
|
bindings::llama_decode(llamacpp.ctx, llamacpp.batch)
|
||||||
};
|
};
|
||||||
if decode != 0 {
|
if decode != 0 {
|
||||||
warn!("llama_decode failed: kv cache clear + sync");
|
warn!("llama_decode failed, clearing kv cache");
|
||||||
unsafe {
|
llamacpp.clear_kv_cache(-1);
|
||||||
bindings::llama_kv_cache_clear(llamacpp.ctx);
|
|
||||||
bindings::llama_synchronize(llamacpp.ctx);
|
|
||||||
}
|
|
||||||
for seq in seqs.iter_mut() {
|
for seq in seqs.iter_mut() {
|
||||||
let _ = requests[seq.id].tx.send(Err(InferError::IncompleteGeneration));
|
let _ = requests[seq.id].tx.send(Err(InferError::IncompleteGeneration));
|
||||||
seq.running = false;
|
seq.running = false;
|
||||||
@ -555,9 +550,7 @@ impl LlamacppBackend {
|
|||||||
seq.batch_pos = llamacpp.batch_push(seq.token, seq.pos, seq.id as _, true);
|
seq.batch_pos = llamacpp.batch_push(seq.token, seq.pos, seq.id as _, true);
|
||||||
seq.pos += 1;
|
seq.pos += 1;
|
||||||
} else {
|
} else {
|
||||||
unsafe {
|
llamacpp.clear_kv_cache(seq.id as _);
|
||||||
bindings::llama_kv_cache_seq_rm(llamacpp.ctx, seq.id as _, -1, -1);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user