From d883109df68529d971d54aa648df0c3d3fca6e4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrien=20Gallou=C3=ABt?= Date: Mon, 3 Feb 2025 20:58:33 +0000 Subject: [PATCH] Disable graceful shutdown in debug mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Adrien Gallouët --- backends/llamacpp/src/backend.rs | 8 ++++++-- backends/llamacpp/src/main.rs | 12 ++++++++++-- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/backends/llamacpp/src/backend.rs b/backends/llamacpp/src/backend.rs index 04160cc4..870798e7 100644 --- a/backends/llamacpp/src/backend.rs +++ b/backends/llamacpp/src/backend.rs @@ -441,7 +441,7 @@ impl LlamacppBackend { pub fn new( conf: LlamacppConfig, tokenizer: Tokenizer, - ) -> (Self, oneshot::Receiver>) { + ) -> (Self, oneshot::Receiver>, watch::Sender) { // Setup llama & export logs, once and for all INIT.call_once(|| unsafe { @@ -457,6 +457,7 @@ impl LlamacppBackend { }); let (status_tx, status_rx) = watch::channel(false); + let (shutdown_tx, shutdown_rx) = watch::channel(false); let (ok_tx, ok_rx) = oneshot::channel(); let (tx, mut rx) = unbounded_channel::(); let (sync_tx, sync_rx) = mpsc::channel(); @@ -509,6 +510,9 @@ impl LlamacppBackend { let _ = status_tx.send(true); while let Ok(requests) = sync_rx.recv() { + if shutdown_rx.borrow().clone() { + break; + } let start_time = Instant::now(); let mut seqs: Vec = Vec::with_capacity(requests.len()); llamacpp.batch.n_tokens = 0; @@ -637,7 +641,7 @@ impl LlamacppBackend { } } }); - (Self{tx, status: status_rx}, ok_rx) + (Self{tx, status: status_rx}, ok_rx, shutdown_tx) } } diff --git a/backends/llamacpp/src/main.rs b/backends/llamacpp/src/main.rs index dba391c0..1b8c4c5d 100644 --- a/backends/llamacpp/src/main.rs +++ b/backends/llamacpp/src/main.rs @@ -6,7 +6,7 @@ use text_generation_router::{logging, server, usage_stats}; use thiserror::Error; use tokenizers::{Tokenizer, FromPretrainedParameters}; use tokio::sync::oneshot::error::RecvError; -use tracing::error; +use tracing::{warn, error}; /// Backend Configuration #[derive(Parser, Debug)] @@ -221,7 +221,7 @@ async fn main() -> Result<(), RouterError> { )? }; - let (backend, ok) = LlamacppBackend::new( + let (backend, ok, shutdown) = LlamacppBackend::new( LlamacppConfig { model_gguf: args.model_gguf, n_ctx: args.n_ctx, @@ -246,6 +246,14 @@ async fn main() -> Result<(), RouterError> { ); ok.await??; + if cfg!(debug_assertions) { + warn!("Graceful shutdown disabled!"); + let _ = tokio::task::spawn(async move { + let _ = tokio::signal::ctrl_c().await; + let _ = shutdown.send(true); + }); + } + server::run( backend, args.max_concurrent_requests,