mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 04:14:52 +00:00
Enable KQV offload by default
Signed-off-by: Adrien Gallouët <angt@huggingface.co>
This commit is contained in:
parent
809e288b5a
commit
3b1b049b32
@ -60,7 +60,7 @@ struct Args {
|
||||
use_mlock: bool,
|
||||
|
||||
/// Enable offloading of KQV operations to the GPU.
|
||||
#[clap(default_value = "false", long, env)]
|
||||
#[clap(default_value = "true", long, env)]
|
||||
offload_kqv: bool,
|
||||
|
||||
/// Enable flash attention for faster inference. (EXPERIMENTAL)
|
||||
|
Loading…
Reference in New Issue
Block a user