diff --git a/launcher/src/main.rs b/launcher/src/main.rs index d2f1c0e3..0d7af66d 100644 --- a/launcher/src/main.rs +++ b/launcher/src/main.rs @@ -124,6 +124,10 @@ fn resolve_attention(config: &Option, lora_adapters: &Option) -> } } } + if attention == Some("paged".to_string()) && prefix_caching.is_none() { + tracing::info!("Disabling prefix caching on paged attention"); + prefix_caching = Some("0".to_string()); + } let attention = attention.unwrap_or("flashinfer".to_string()); let prefix_caching = prefix_caching.unwrap_or("true".to_string());