Disable graceful shutdown in debug mode

Signed-off-by: Adrien Gallouët <angt@huggingface.co>
2025-06-19 15:52:08 +00:00 · 2025-02-03 20:58:33 +00:00 · 2025-02-03 20:58:33 +00:00 · d883109df6
commit d883109df6
parent 207041a977
2 changed files with 16 additions and 4 deletions
--- a/backends/llamacpp/src/backend.rs
+++ b/backends/llamacpp/src/backend.rs
@ -441,7 +441,7 @@ impl LlamacppBackend {
    pub fn new(
        conf: LlamacppConfig,
        tokenizer: Tokenizer,
-    ) -> (Self, oneshot::Receiver<Result<(),BackendError>>) {
+    ) -> (Self, oneshot::Receiver<Result<(),BackendError>>, watch::Sender<bool>) {

        // Setup llama & export logs, once and for all
        INIT.call_once(|| unsafe {
@ -457,6 +457,7 @@ impl LlamacppBackend {
        });

        let (status_tx, status_rx) = watch::channel(false);
+        let (shutdown_tx, shutdown_rx) = watch::channel(false);
        let (ok_tx, ok_rx) = oneshot::channel();
        let (tx, mut rx) = unbounded_channel::<LlamacppRequest>();
        let (sync_tx, sync_rx) = mpsc::channel();
@ -509,6 +510,9 @@ impl LlamacppBackend {
            let _ = status_tx.send(true);

            while let Ok(requests) = sync_rx.recv() {
+                if shutdown_rx.borrow().clone() {
+                    break;
+                }
                let start_time = Instant::now();
                let mut seqs: Vec<LlamacppSeq> = Vec::with_capacity(requests.len());
                llamacpp.batch.n_tokens = 0;
@ -637,7 +641,7 @@ impl LlamacppBackend {
                }
            }
        });
-        (Self{tx, status: status_rx}, ok_rx)
+        (Self{tx, status: status_rx}, ok_rx, shutdown_tx)
    }
 }

--- a/backends/llamacpp/src/main.rs
+++ b/backends/llamacpp/src/main.rs
@ -6,7 +6,7 @@ use text_generation_router::{logging, server, usage_stats};
 use thiserror::Error;
 use tokenizers::{Tokenizer, FromPretrainedParameters};
 use tokio::sync::oneshot::error::RecvError;
-use tracing::error;
+use tracing::{warn, error};

 /// Backend Configuration
 #[derive(Parser, Debug)]
@ -221,7 +221,7 @@ async fn main() -> Result<(), RouterError> {
        )?
    };

-    let (backend, ok) = LlamacppBackend::new(
+    let (backend, ok, shutdown) = LlamacppBackend::new(
        LlamacppConfig {
            model_gguf:                      args.model_gguf,
            n_ctx:                           args.n_ctx,
@ -246,6 +246,14 @@ async fn main() -> Result<(), RouterError> {
    );
    ok.await??;

+    if cfg!(debug_assertions) {
+        warn!("Graceful shutdown disabled!");
+        let _ = tokio::task::spawn(async move {
+            let _ = tokio::signal::ctrl_c().await;
+            let _ = shutdown.send(true);
+        });
+    }
+
    server::run(
        backend,
        args.max_concurrent_requests,