mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-06-19 15:52:08 +00:00
Remove warmup
Signed-off-by: Adrien Gallouët <angt@huggingface.co>
This commit is contained in:
parent
8ed362d03a
commit
104a968d01
@ -238,38 +238,6 @@ impl Llamacpp {
|
||||
self.batch.n_tokens += 1;
|
||||
n
|
||||
}
|
||||
|
||||
// useless ?
|
||||
fn warmup(&self) {
|
||||
let mut buf: Vec<bindings::llama_token> = Vec::new();
|
||||
|
||||
let bos = unsafe {
|
||||
bindings::llama_vocab_bos(self.vocab)
|
||||
};
|
||||
if bos != bindings::LLAMA_TOKEN_NULL {
|
||||
buf.push(bos);
|
||||
}
|
||||
let eos = unsafe {
|
||||
bindings::llama_vocab_eos(self.vocab)
|
||||
};
|
||||
if eos != bindings::LLAMA_TOKEN_NULL {
|
||||
buf.push(eos);
|
||||
}
|
||||
if buf.is_empty() {
|
||||
warn!("Warmup failed: no bos/eos...");
|
||||
return;
|
||||
}
|
||||
let batch = unsafe {
|
||||
bindings::llama_batch_get_one(buf.as_ptr() as _, buf.len() as _)
|
||||
};
|
||||
if unsafe { bindings::llama_decode(self.ctx, batch) } != 0 {
|
||||
error!("Warmup failed: llama_decode() returned an error");
|
||||
}
|
||||
unsafe {
|
||||
bindings::llama_kv_cache_clear(self.ctx);
|
||||
bindings::llama_synchronize(self.ctx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for Llamacpp {
|
||||
@ -456,8 +424,6 @@ impl LlamacppBackend {
|
||||
Ok(v) => { let _ = ok_tx.send(Ok(())); v },
|
||||
Err(e) => { let _ = ok_tx.send(Err(e)); return; },
|
||||
};
|
||||
llamacpp.warmup();
|
||||
|
||||
let vocab = tokenizer.get_added_vocabulary();
|
||||
|
||||
// health() returns true
|
||||
|
Loading…
Reference in New Issue
Block a user