mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 04:14:52 +00:00
Fix bool args
Signed-off-by: Adrien Gallouët <angt@huggingface.co>
This commit is contained in:
parent
1401418243
commit
b77d05d3af
@ -52,19 +52,19 @@ struct Args {
|
|||||||
numa: LlamacppNuma,
|
numa: LlamacppNuma,
|
||||||
|
|
||||||
/// Use memory mapping for the model.
|
/// Use memory mapping for the model.
|
||||||
#[clap(default_value = "true", long, env)]
|
#[clap(long, env)]
|
||||||
use_mmap: bool,
|
use_mmap: bool,
|
||||||
|
|
||||||
/// Use memory locking to prevent swapping.
|
/// Use memory locking to prevent swapping.
|
||||||
#[clap(default_value = "false", long, env)]
|
#[clap(long, env)]
|
||||||
use_mlock: bool,
|
use_mlock: bool,
|
||||||
|
|
||||||
/// Enable offloading of KQV operations to the GPU.
|
/// Enable offloading of KQV operations to the GPU.
|
||||||
#[clap(default_value = "true", long, env)]
|
#[clap(long, env)]
|
||||||
offload_kqv: bool,
|
offload_kqv: bool,
|
||||||
|
|
||||||
/// Enable flash attention for faster inference. (EXPERIMENTAL)
|
/// Enable flash attention for faster inference. (EXPERIMENTAL)
|
||||||
#[clap(default_value = "true", long, env)]
|
#[clap(long, env)]
|
||||||
flash_attention: bool,
|
flash_attention: bool,
|
||||||
|
|
||||||
/// Data type used for K cache.
|
/// Data type used for K cache.
|
||||||
@ -132,7 +132,7 @@ struct Args {
|
|||||||
tokenizer_config_path: Option<String>,
|
tokenizer_config_path: Option<String>,
|
||||||
|
|
||||||
/// Disable grammar support.
|
/// Disable grammar support.
|
||||||
#[clap(long, env, default_value_t = false)]
|
#[clap(long, env)]
|
||||||
disable_grammar_support: bool,
|
disable_grammar_support: bool,
|
||||||
|
|
||||||
/// Maximum number of inputs per request.
|
/// Maximum number of inputs per request.
|
||||||
|
Loading…
Reference in New Issue
Block a user