From 390f0ec06159aacdcfb88c8437e0ce9db98685fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrien=20Gallou=C3=ABt?= Date: Fri, 31 Jan 2025 15:00:23 +0000 Subject: [PATCH] Cleanup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Adrien Gallouët --- backends/llamacpp/src/backend.rs | 2 -- backends/llamacpp/src/main.rs | 6 +++--- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/backends/llamacpp/src/backend.rs b/backends/llamacpp/src/backend.rs index ba5ca186..bf45a67f 100644 --- a/backends/llamacpp/src/backend.rs +++ b/backends/llamacpp/src/backend.rs @@ -151,8 +151,6 @@ impl Llamacpp { LlamacppSplitMode::GPU(n) => n as _, _ => 0, }; - info!(?params.split_mode); - info!(?params.main_gpu); params.use_mmap = conf.use_mmap; params.use_mlock = conf.use_mlock; bindings::llama_model_load_from_file(gguf.as_ptr(), params) diff --git a/backends/llamacpp/src/main.rs b/backends/llamacpp/src/main.rs index e1edd72d..5fb23d17 100644 --- a/backends/llamacpp/src/main.rs +++ b/backends/llamacpp/src/main.rs @@ -37,19 +37,19 @@ struct Args { n_gpu_layers: usize, /// Split the model across multiple GPUs. - #[clap(default_value = "Layer", value_enum, long, env)] + #[clap(default_value = "Layer", long, env)] split_mode: LlamacppSplitMode, /// Defragment the KV cache if holes/size > threshold. #[clap(default_value = "-1.0", long, env)] defrag_threshold: f32, - #[clap(default_value = "true", long, env)] /// Whether to use memory mapping. + #[clap(default_value = "true", long, env)] use_mmap: bool, - #[clap(default_value = "false", long, env)] /// Whether to use memory locking. + #[clap(default_value = "false", long, env)] use_mlock: bool, /// Enable flash attention for faster inference. (EXPERIMENTAL)