Add simple resolution when user specifies ATTENTION=paged.

This commit is contained in:
Nicolas Patry 2024-10-16 10:56:58 +02:00
parent 5c72f269b6
commit ff36b2fb39
No known key found for this signature in database
GPG Key ID: D2920555C90F704C

View File

@ -124,6 +124,10 @@ fn resolve_attention(config: &Option<Config>, lora_adapters: &Option<String>) ->
}
}
}
if attention == Some("paged".to_string()) && prefix_caching.is_none() {
tracing::info!("Disabling prefix caching on paged attention");
prefix_caching = Some("0".to_string());
}
let attention = attention.unwrap_or("flashinfer".to_string());
let prefix_caching = prefix_caching.unwrap_or("true".to_string());