diff --git a/launcher/src/main.rs b/launcher/src/main.rs index acff85730..2e22c1007 100644 --- a/launcher/src/main.rs +++ b/launcher/src/main.rs @@ -158,7 +158,7 @@ fn resolve_attention(config: &Option, lora_adapters: &Option) -> prefix_caching = Some("0".to_string()); } match config.model_type.as_deref() { - Some("falcon") | Some("deepseek_v2") => { + Some("falcon") | Some("deepseek_v2") | Some("llama4") => { // Required because gemma2 needs bfloat16 which is not supported by // flashinfer ? if attention.is_none() {