From ff36b2fb390ac9f0b4fe0e175601c02a1045993c Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Wed, 16 Oct 2024 10:56:58 +0200 Subject: [PATCH] Add simple resolution when user specifies ATTENTION=paged. --- launcher/src/main.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/launcher/src/main.rs b/launcher/src/main.rs index d2f1c0e3..0d7af66d 100644 --- a/launcher/src/main.rs +++ b/launcher/src/main.rs @@ -124,6 +124,10 @@ fn resolve_attention(config: &Option, lora_adapters: &Option) -> } } } + if attention == Some("paged".to_string()) && prefix_caching.is_none() { + tracing::info!("Disabling prefix caching on paged attention"); + prefix_caching = Some("0".to_string()); + } let attention = attention.unwrap_or("flashinfer".to_string()); let prefix_caching = prefix_caching.unwrap_or("true".to_string());