From ce7e35656127cbd0c5b4ddff074365a6201a206a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20de=20Kok?= Date: Tue, 15 Oct 2024 13:49:32 +0000 Subject: [PATCH] Use flashinfer for Gemma 2. --- launcher/src/main.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/launcher/src/main.rs b/launcher/src/main.rs index 42962dffb..ee259e434 100644 --- a/launcher/src/main.rs +++ b/launcher/src/main.rs @@ -94,7 +94,7 @@ fn resolve_attention(config: &Option, lora_adapters: &Option) -> prefix_caching = Some("0".to_string()); } match config.model_type.as_deref() { - Some("gemma2") | Some("falcon") | Some("deepseek_v2") => { + Some("falcon") | Some("deepseek_v2") => { // Required because gemma2 needs bfloat16 which is not supported by // flashinfer ? if attention.is_none() {