Enable flash attention by default

Signed-off-by: Adrien Gallouët <angt@huggingface.co>
This commit is contained in:
Adrien Gallouët 2025-01-31 16:07:10 +00:00
parent 3f199134f0
commit ae5bb789c2
No known key found for this signature in database

View File

@ -57,7 +57,7 @@ struct Args {
use_mlock: bool, use_mlock: bool,
/// Enable flash attention for faster inference. (EXPERIMENTAL) /// Enable flash attention for faster inference. (EXPERIMENTAL)
#[clap(default_value = "false", long, env)] #[clap(default_value = "true", long, env)]
flash_attention: bool, flash_attention: bool,
/// TODO /// TODO