From ae5bb789c29410a8e6376349eeca88fcd1c60615 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrien=20Gallou=C3=ABt?= Date: Fri, 31 Jan 2025 16:07:10 +0000 Subject: [PATCH] Enable flash attention by default MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Adrien Gallouët --- backends/llamacpp/src/main.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backends/llamacpp/src/main.rs b/backends/llamacpp/src/main.rs index 4afa64e9..1c7c5e4c 100644 --- a/backends/llamacpp/src/main.rs +++ b/backends/llamacpp/src/main.rs @@ -57,7 +57,7 @@ struct Args { use_mlock: bool, /// Enable flash attention for faster inference. (EXPERIMENTAL) - #[clap(default_value = "false", long, env)] + #[clap(default_value = "true", long, env)] flash_attention: bool, /// TODO