diff --git a/backends/llamacpp/src/backend.rs b/backends/llamacpp/src/backend.rs
index c07f0812..ca41f302 100644
--- a/backends/llamacpp/src/backend.rs
+++ b/backends/llamacpp/src/backend.rs
@@ -470,6 +470,7 @@ impl LlamacppBackend {
 
                 for (seq_id, request) in requests.iter().enumerate() {
                     debug!("Request: {:?}", request);
+                    // TODO remove this
                     let sampler = match LlamacppSampler::new(&request) {
                         Some(sampler) => sampler,
                         _ => {
@@ -506,11 +507,9 @@ impl LlamacppBackend {
                         bindings::llama_decode(llamacpp.ctx, llamacpp.batch)
                     };
                     if decode != 0 {
-                        error!("Failed to decode batch: {decode}");
-
                         if decode == 1 {
                             unsafe {
-                                bindings::llama_kv_cache_clear(llamacpp.ctx); // TODO
+                                bindings::llama_kv_cache_clear(llamacpp.ctx); // TODO: remove this ?
                             }
                         }
                         for seq in seqs.iter_mut() {
@@ -523,6 +522,9 @@ impl LlamacppBackend {
                         bindings::llama_get_kv_cache_used_cells(llamacpp.ctx)
                     };
                     for seq in seqs.iter_mut() {
+                        if !seq.running {
+                            continue;
+                        }
                         let (next, logprob) = seq.sampler.sample(&mut llamacpp, seq.batch_pos);
                         seq.n_new_tokens += 1;
                         seq.token = next;
@@ -533,7 +535,7 @@ impl LlamacppBackend {
                                 error!("Failed to decode token: {e}");
                                 let _ = requests[seq.id].tx.send(Err(InferError::IncompleteGeneration));
                                 seq.running = false;
-                                break;
+                                continue;
                             },
                         };
                         let special = vocab.is_special_token(&piece);
@@ -572,7 +574,7 @@ impl LlamacppBackend {
                                 queued: requests[seq.id].time,
                             }));
                             seq.running = false;
-                            break;
+                            continue;
                         }
                         let _ = requests[seq.id].tx.send(Ok(InferStreamResponse::Intermediate {
                             token: token,