Signed-off-by: Adrien Gallouët <angt@huggingface.co>
This commit is contained in:
Adrien Gallouët 2025-01-31 15:00:23 +00:00
parent d6ded897a8
commit 390f0ec061
No known key found for this signature in database
2 changed files with 3 additions and 5 deletions

View File

@ -151,8 +151,6 @@ impl Llamacpp {
LlamacppSplitMode::GPU(n) => n as _,
_ => 0,
};
info!(?params.split_mode);
info!(?params.main_gpu);
params.use_mmap = conf.use_mmap;
params.use_mlock = conf.use_mlock;
bindings::llama_model_load_from_file(gguf.as_ptr(), params)

View File

@ -37,19 +37,19 @@ struct Args {
n_gpu_layers: usize,
/// Split the model across multiple GPUs.
#[clap(default_value = "Layer", value_enum, long, env)]
#[clap(default_value = "Layer", long, env)]
split_mode: LlamacppSplitMode,
/// Defragment the KV cache if holes/size > threshold.
#[clap(default_value = "-1.0", long, env)]
defrag_threshold: f32,
#[clap(default_value = "true", long, env)]
/// Whether to use memory mapping.
#[clap(default_value = "true", long, env)]
use_mmap: bool,
#[clap(default_value = "false", long, env)]
/// Whether to use memory locking.
#[clap(default_value = "false", long, env)]
use_mlock: bool,
/// Enable flash attention for faster inference. (EXPERIMENTAL)