mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-06-19 15:52:08 +00:00
Cleanup
Signed-off-by: Adrien Gallouët <angt@huggingface.co>
This commit is contained in:
parent
0f62401b8e
commit
f22e2fb550
@ -290,6 +290,12 @@ impl Llamacpp {
|
|||||||
Ok(Llamacpp{model, ctx, vocab, logprobs, batch})
|
Ok(Llamacpp{model, ctx, vocab, logprobs, batch})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn decode(&mut self) -> i32 {
|
||||||
|
unsafe {
|
||||||
|
llamacpp::decode(self.ctx, self.batch)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn clear_kv_cache(&mut self, seq_id: llamacpp::llama_seq_id) {
|
fn clear_kv_cache(&mut self, seq_id: llamacpp::llama_seq_id) {
|
||||||
unsafe {
|
unsafe {
|
||||||
llamacpp::kv_cache_seq_rm(self.ctx, seq_id, -1, -1);
|
llamacpp::kv_cache_seq_rm(self.ctx, seq_id, -1, -1);
|
||||||
@ -543,14 +549,8 @@ impl LlamacppBackend {
|
|||||||
running: true,
|
running: true,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
loop {
|
while llamacpp.batch.n_tokens > 0 {
|
||||||
if llamacpp.batch.n_tokens == 0 {
|
if llamacpp.decode() != 0 {
|
||||||
break;
|
|
||||||
}
|
|
||||||
let decode = unsafe {
|
|
||||||
llamacpp::decode(llamacpp.ctx, llamacpp.batch)
|
|
||||||
};
|
|
||||||
if decode != 0 {
|
|
||||||
warn!("llama_decode failed, clearing kv cache");
|
warn!("llama_decode failed, clearing kv cache");
|
||||||
llamacpp.clear_kv_cache(-1);
|
llamacpp.clear_kv_cache(-1);
|
||||||
for seq in seqs.iter_mut() {
|
for seq in seqs.iter_mut() {
|
||||||
|
Loading…
Reference in New Issue
Block a user