diff --git a/launcher/src/main.rs b/launcher/src/main.rs index 1f47475b..4eeea02d 100644 --- a/launcher/src/main.rs +++ b/launcher/src/main.rs @@ -1500,6 +1500,10 @@ fn main() -> Result<(), LauncherError> { match config.head_dim { Some(h) if h == 64 || h == 128 || h == 256 => { // std::env::set_var("ATTENTION", "flashdecoding"); + if args.lora_adapters.is_some() { + tracing::info!("Disabling prefix caching because of lora adapters"); + std::env::set_var("USE_PREFIX_CACHING", "0"); + } } _ => { tracing::info!("Forcing flash decoding because head dim is not supported by flashinfer, also disabling prefix caching");