From 3b1b049b321290f641db2be409f06467faff34e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrien=20Gallou=C3=ABt?= Date: Thu, 6 Feb 2025 18:33:30 +0000 Subject: [PATCH] Enable KQV offload by default MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Adrien Gallouët --- backends/llamacpp/src/main.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backends/llamacpp/src/main.rs b/backends/llamacpp/src/main.rs index 753138f9..e8aa579f 100644 --- a/backends/llamacpp/src/main.rs +++ b/backends/llamacpp/src/main.rs @@ -60,7 +60,7 @@ struct Args { use_mlock: bool, /// Enable offloading of KQV operations to the GPU. - #[clap(default_value = "false", long, env)] + #[clap(default_value = "true", long, env)] offload_kqv: bool, /// Enable flash attention for faster inference. (EXPERIMENTAL)