mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-06-19 15:52:08 +00:00
Cleanup
Signed-off-by: Adrien Gallouët <angt@huggingface.co>
This commit is contained in:
parent
d6ded897a8
commit
390f0ec061
@ -151,8 +151,6 @@ impl Llamacpp {
|
|||||||
LlamacppSplitMode::GPU(n) => n as _,
|
LlamacppSplitMode::GPU(n) => n as _,
|
||||||
_ => 0,
|
_ => 0,
|
||||||
};
|
};
|
||||||
info!(?params.split_mode);
|
|
||||||
info!(?params.main_gpu);
|
|
||||||
params.use_mmap = conf.use_mmap;
|
params.use_mmap = conf.use_mmap;
|
||||||
params.use_mlock = conf.use_mlock;
|
params.use_mlock = conf.use_mlock;
|
||||||
bindings::llama_model_load_from_file(gguf.as_ptr(), params)
|
bindings::llama_model_load_from_file(gguf.as_ptr(), params)
|
||||||
|
@ -37,19 +37,19 @@ struct Args {
|
|||||||
n_gpu_layers: usize,
|
n_gpu_layers: usize,
|
||||||
|
|
||||||
/// Split the model across multiple GPUs.
|
/// Split the model across multiple GPUs.
|
||||||
#[clap(default_value = "Layer", value_enum, long, env)]
|
#[clap(default_value = "Layer", long, env)]
|
||||||
split_mode: LlamacppSplitMode,
|
split_mode: LlamacppSplitMode,
|
||||||
|
|
||||||
/// Defragment the KV cache if holes/size > threshold.
|
/// Defragment the KV cache if holes/size > threshold.
|
||||||
#[clap(default_value = "-1.0", long, env)]
|
#[clap(default_value = "-1.0", long, env)]
|
||||||
defrag_threshold: f32,
|
defrag_threshold: f32,
|
||||||
|
|
||||||
#[clap(default_value = "true", long, env)]
|
|
||||||
/// Whether to use memory mapping.
|
/// Whether to use memory mapping.
|
||||||
|
#[clap(default_value = "true", long, env)]
|
||||||
use_mmap: bool,
|
use_mmap: bool,
|
||||||
|
|
||||||
#[clap(default_value = "false", long, env)]
|
|
||||||
/// Whether to use memory locking.
|
/// Whether to use memory locking.
|
||||||
|
#[clap(default_value = "false", long, env)]
|
||||||
use_mlock: bool,
|
use_mlock: bool,
|
||||||
|
|
||||||
/// Enable flash attention for faster inference. (EXPERIMENTAL)
|
/// Enable flash attention for faster inference. (EXPERIMENTAL)
|
||||||
|
Loading…
Reference in New Issue
Block a user