diff --git a/launcher/src/main.rs b/launcher/src/main.rs index 42962dffb..ee259e434 100644 --- a/launcher/src/main.rs +++ b/launcher/src/main.rs @@ -94,7 +94,7 @@ fn resolve_attention(config: &Option, lora_adapters: &Option) -> prefix_caching = Some("0".to_string()); } match config.model_type.as_deref() { - Some("gemma2") | Some("falcon") | Some("deepseek_v2") => { + Some("falcon") | Some("deepseek_v2") => { // Required because gemma2 needs bfloat16 which is not supported by // flashinfer ? if attention.is_none() {