Signed-off-by: Adrien Gallouët <angt@huggingface.co>
This commit is contained in:
Adrien Gallouët 2025-01-31 15:00:23 +00:00
parent d6ded897a8
commit 390f0ec061
No known key found for this signature in database
2 changed files with 3 additions and 5 deletions

View File

@ -151,8 +151,6 @@ impl Llamacpp {
LlamacppSplitMode::GPU(n) => n as _, LlamacppSplitMode::GPU(n) => n as _,
_ => 0, _ => 0,
}; };
info!(?params.split_mode);
info!(?params.main_gpu);
params.use_mmap = conf.use_mmap; params.use_mmap = conf.use_mmap;
params.use_mlock = conf.use_mlock; params.use_mlock = conf.use_mlock;
bindings::llama_model_load_from_file(gguf.as_ptr(), params) bindings::llama_model_load_from_file(gguf.as_ptr(), params)

View File

@ -37,19 +37,19 @@ struct Args {
n_gpu_layers: usize, n_gpu_layers: usize,
/// Split the model across multiple GPUs. /// Split the model across multiple GPUs.
#[clap(default_value = "Layer", value_enum, long, env)] #[clap(default_value = "Layer", long, env)]
split_mode: LlamacppSplitMode, split_mode: LlamacppSplitMode,
/// Defragment the KV cache if holes/size > threshold. /// Defragment the KV cache if holes/size > threshold.
#[clap(default_value = "-1.0", long, env)] #[clap(default_value = "-1.0", long, env)]
defrag_threshold: f32, defrag_threshold: f32,
#[clap(default_value = "true", long, env)]
/// Whether to use memory mapping. /// Whether to use memory mapping.
#[clap(default_value = "true", long, env)]
use_mmap: bool, use_mmap: bool,
#[clap(default_value = "false", long, env)]
/// Whether to use memory locking. /// Whether to use memory locking.
#[clap(default_value = "false", long, env)]
use_mlock: bool, use_mlock: bool,
/// Enable flash attention for faster inference. (EXPERIMENTAL) /// Enable flash attention for faster inference. (EXPERIMENTAL)