mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-06-19 15:52:08 +00:00
Enable flash attention by default
Signed-off-by: Adrien Gallouët <angt@huggingface.co>
This commit is contained in:
parent
3f199134f0
commit
ae5bb789c2
@ -57,7 +57,7 @@ struct Args {
|
||||
use_mlock: bool,
|
||||
|
||||
/// Enable flash attention for faster inference. (EXPERIMENTAL)
|
||||
#[clap(default_value = "false", long, env)]
|
||||
#[clap(default_value = "true", long, env)]
|
||||
flash_attention: bool,
|
||||
|
||||
/// TODO
|
||||
|
Loading…
Reference in New Issue
Block a user