Enable KQV offload by default

Signed-off-by: Adrien Gallouët <angt@huggingface.co>
This commit is contained in:
Adrien Gallouët 2025-02-06 18:33:30 +00:00
parent 809e288b5a
commit 3b1b049b32
No known key found for this signature in database

View File

@ -60,7 +60,7 @@ struct Args {
use_mlock: bool,
/// Enable offloading of KQV operations to the GPU.
#[clap(default_value = "false", long, env)]
#[clap(default_value = "true", long, env)]
offload_kqv: bool,
/// Enable flash attention for faster inference. (EXPERIMENTAL)