mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-06-19 15:52:08 +00:00
Disable graceful shutdown in debug mode
Signed-off-by: Adrien Gallouët <angt@huggingface.co>
This commit is contained in:
parent
207041a977
commit
d883109df6
@ -441,7 +441,7 @@ impl LlamacppBackend {
|
||||
pub fn new(
|
||||
conf: LlamacppConfig,
|
||||
tokenizer: Tokenizer,
|
||||
) -> (Self, oneshot::Receiver<Result<(),BackendError>>) {
|
||||
) -> (Self, oneshot::Receiver<Result<(),BackendError>>, watch::Sender<bool>) {
|
||||
|
||||
// Setup llama & export logs, once and for all
|
||||
INIT.call_once(|| unsafe {
|
||||
@ -457,6 +457,7 @@ impl LlamacppBackend {
|
||||
});
|
||||
|
||||
let (status_tx, status_rx) = watch::channel(false);
|
||||
let (shutdown_tx, shutdown_rx) = watch::channel(false);
|
||||
let (ok_tx, ok_rx) = oneshot::channel();
|
||||
let (tx, mut rx) = unbounded_channel::<LlamacppRequest>();
|
||||
let (sync_tx, sync_rx) = mpsc::channel();
|
||||
@ -509,6 +510,9 @@ impl LlamacppBackend {
|
||||
let _ = status_tx.send(true);
|
||||
|
||||
while let Ok(requests) = sync_rx.recv() {
|
||||
if shutdown_rx.borrow().clone() {
|
||||
break;
|
||||
}
|
||||
let start_time = Instant::now();
|
||||
let mut seqs: Vec<LlamacppSeq> = Vec::with_capacity(requests.len());
|
||||
llamacpp.batch.n_tokens = 0;
|
||||
@ -637,7 +641,7 @@ impl LlamacppBackend {
|
||||
}
|
||||
}
|
||||
});
|
||||
(Self{tx, status: status_rx}, ok_rx)
|
||||
(Self{tx, status: status_rx}, ok_rx, shutdown_tx)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -6,7 +6,7 @@ use text_generation_router::{logging, server, usage_stats};
|
||||
use thiserror::Error;
|
||||
use tokenizers::{Tokenizer, FromPretrainedParameters};
|
||||
use tokio::sync::oneshot::error::RecvError;
|
||||
use tracing::error;
|
||||
use tracing::{warn, error};
|
||||
|
||||
/// Backend Configuration
|
||||
#[derive(Parser, Debug)]
|
||||
@ -221,7 +221,7 @@ async fn main() -> Result<(), RouterError> {
|
||||
)?
|
||||
};
|
||||
|
||||
let (backend, ok) = LlamacppBackend::new(
|
||||
let (backend, ok, shutdown) = LlamacppBackend::new(
|
||||
LlamacppConfig {
|
||||
model_gguf: args.model_gguf,
|
||||
n_ctx: args.n_ctx,
|
||||
@ -246,6 +246,14 @@ async fn main() -> Result<(), RouterError> {
|
||||
);
|
||||
ok.await??;
|
||||
|
||||
if cfg!(debug_assertions) {
|
||||
warn!("Graceful shutdown disabled!");
|
||||
let _ = tokio::task::spawn(async move {
|
||||
let _ = tokio::signal::ctrl_c().await;
|
||||
let _ = shutdown.send(true);
|
||||
});
|
||||
}
|
||||
|
||||
server::run(
|
||||
backend,
|
||||
args.max_concurrent_requests,
|
||||
|
Loading…
Reference in New Issue
Block a user