force attn to flashdecoding

2025-09-15 06:14:52 +00:00 · 2025-04-11 15:24:12 +00:00 · 2025-04-11 15:24:12 +00:00 · 2a10a28d08
commit 2a10a28d08
parent a7353c35e8
1 changed files with 1 additions and 1 deletions
--- a/launcher/src/main.rs
+++ b/launcher/src/main.rs
@ -158,7 +158,7 @@ fn resolve_attention(config: &Option<Config>, lora_adapters: &Option<String>) ->
                    prefix_caching = Some("0".to_string());
                }
                match config.model_type.as_deref() {
-                    Some("falcon") | Some("deepseek_v2") => {
+                    Some("falcon") | Some("deepseek_v2") | Some("llama4") => {
                        // Required because gemma2 needs bfloat16 which is not supported by
                        // flashinfer ?
                        if attention.is_none() {