Disable graceful shutdown in debug mode

Signed-off-by: Adrien Gallouët <angt@huggingface.co>
This commit is contained in:
Adrien Gallouët 2025-02-03 20:58:33 +00:00
parent 207041a977
commit d883109df6
No known key found for this signature in database
2 changed files with 16 additions and 4 deletions

View File

@ -441,7 +441,7 @@ impl LlamacppBackend {
pub fn new(
conf: LlamacppConfig,
tokenizer: Tokenizer,
) -> (Self, oneshot::Receiver<Result<(),BackendError>>) {
) -> (Self, oneshot::Receiver<Result<(),BackendError>>, watch::Sender<bool>) {
// Setup llama & export logs, once and for all
INIT.call_once(|| unsafe {
@ -457,6 +457,7 @@ impl LlamacppBackend {
});
let (status_tx, status_rx) = watch::channel(false);
let (shutdown_tx, shutdown_rx) = watch::channel(false);
let (ok_tx, ok_rx) = oneshot::channel();
let (tx, mut rx) = unbounded_channel::<LlamacppRequest>();
let (sync_tx, sync_rx) = mpsc::channel();
@ -509,6 +510,9 @@ impl LlamacppBackend {
let _ = status_tx.send(true);
while let Ok(requests) = sync_rx.recv() {
if shutdown_rx.borrow().clone() {
break;
}
let start_time = Instant::now();
let mut seqs: Vec<LlamacppSeq> = Vec::with_capacity(requests.len());
llamacpp.batch.n_tokens = 0;
@ -637,7 +641,7 @@ impl LlamacppBackend {
}
}
});
(Self{tx, status: status_rx}, ok_rx)
(Self{tx, status: status_rx}, ok_rx, shutdown_tx)
}
}

View File

@ -6,7 +6,7 @@ use text_generation_router::{logging, server, usage_stats};
use thiserror::Error;
use tokenizers::{Tokenizer, FromPretrainedParameters};
use tokio::sync::oneshot::error::RecvError;
use tracing::error;
use tracing::{warn, error};
/// Backend Configuration
#[derive(Parser, Debug)]
@ -221,7 +221,7 @@ async fn main() -> Result<(), RouterError> {
)?
};
let (backend, ok) = LlamacppBackend::new(
let (backend, ok, shutdown) = LlamacppBackend::new(
LlamacppConfig {
model_gguf: args.model_gguf,
n_ctx: args.n_ctx,
@ -246,6 +246,14 @@ async fn main() -> Result<(), RouterError> {
);
ok.await??;
if cfg!(debug_assertions) {
warn!("Graceful shutdown disabled!");
let _ = tokio::task::spawn(async move {
let _ = tokio::signal::ctrl_c().await;
let _ = shutdown.send(true);
});
}
server::run(
backend,
args.max_concurrent_requests,