From 2a10a28d08a0793e4bde36aa01a2a043e676dacf Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Fri, 11 Apr 2025 15:24:12 +0000 Subject: [PATCH] force attn to flashdecoding --- launcher/src/main.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/launcher/src/main.rs b/launcher/src/main.rs index acff85730..2e22c1007 100644 --- a/launcher/src/main.rs +++ b/launcher/src/main.rs @@ -158,7 +158,7 @@ fn resolve_attention(config: &Option, lora_adapters: &Option) -> prefix_caching = Some("0".to_string()); } match config.model_type.as_deref() { - Some("falcon") | Some("deepseek_v2") => { + Some("falcon") | Some("deepseek_v2") | Some("llama4") => { // Required because gemma2 needs bfloat16 which is not supported by // flashinfer ? if attention.is_none() {