From 7eec0f704f05cbcc55e8b2a8132679b497d02fe3 Mon Sep 17 00:00:00 2001 From: Morgan Funtowicz Date: Tue, 5 Nov 2024 23:48:13 +0100 Subject: [PATCH] chore(backend): minor fixes mostly format --- backends/llamacpp/build.rs | 8 ++++---- backends/llamacpp/csrc/backend.cpp | 2 +- backends/llamacpp/csrc/backend.hpp | 7 ++++++- backends/llamacpp/src/backend.rs | 4 ++-- 4 files changed, 13 insertions(+), 8 deletions(-) diff --git a/backends/llamacpp/build.rs b/backends/llamacpp/build.rs index eefc6403..1ab926d4 100644 --- a/backends/llamacpp/build.rs +++ b/backends/llamacpp/build.rs @@ -99,11 +99,11 @@ fn main() { println!("cargo:rustc-link-search=native={}", out_dir.display()); if is_debug { - println!("cargo:rustc-link-lib=static=fmtd"); - println!("cargo:rustc-link-lib=static=spdlogd"); + println!("cargo:rustc-link-lib=dylib=fmtd"); + println!("cargo:rustc-link-lib=dylib=spdlogd"); } else { - println!("cargo:rustc-link-lib=fmt"); - println!("cargo:rustc-link-lib=spdlog"); + println!("cargo:rustc-link-lib=dylib=fmt"); + println!("cargo:rustc-link-lib=dylib=spdlog"); } println!("cargo:rustc-link-lib=static=common"); diff --git a/backends/llamacpp/csrc/backend.cpp b/backends/llamacpp/csrc/backend.cpp index f6956381..739b84a1 100644 --- a/backends/llamacpp/csrc/backend.cpp +++ b/backends/llamacpp/csrc/backend.cpp @@ -170,7 +170,7 @@ namespace huggingface::tgi::backends::llamacpp { mContext_(llama_context_factory(model)), mWorker_(mModel_, params.value_or(llama_context_default_params())) { llama_numa_init(ggml_numa_strategy::GGML_NUMA_STRATEGY_NUMACTL); - } + }; std::expected single_worker_backend_t::stream( diff --git a/backends/llamacpp/csrc/backend.hpp b/backends/llamacpp/csrc/backend.hpp index bf9df5cc..4abc202d 100644 --- a/backends/llamacpp/csrc/backend.hpp +++ b/backends/llamacpp/csrc/backend.hpp @@ -157,10 +157,11 @@ namespace huggingface::tgi::backends::llamacpp { class single_worker_backend_t : backend_base_t { private: - constexpr const static auto llama_context_factory = [](llama_model *pModel) -> llama_context_ptr { + constexpr static auto llama_context_factory = [](llama_model *pModel) -> llama_context_ptr { auto llParams = llama_context_default_params(); llParams.flash_attn = true; llParams.n_batch = 1; + llParams.n_threads = 1; llParams.no_perf = true; llParams.attention_type = llama_attention_type::LLAMA_ATTENTION_TYPE_CAUSAL; @@ -173,6 +174,8 @@ namespace huggingface::tgi::backends::llamacpp { public: explicit single_worker_backend_t(llama_model *pModel, const std::optional &); + using backend_base_t::generate; + std::expected stream( std::span tokens, const generation_params_t &generation_params, @@ -185,6 +188,8 @@ namespace huggingface::tgi::backends::llamacpp { llama_context_ptr mContext_; public: + using backend_base_t::generate; + std::expected stream( std::span tokens, const generation_params_t &generation_params, diff --git a/backends/llamacpp/src/backend.rs b/backends/llamacpp/src/backend.rs index 62b4743d..609c8405 100644 --- a/backends/llamacpp/src/backend.rs +++ b/backends/llamacpp/src/backend.rs @@ -70,7 +70,7 @@ pub enum LlamaCppBackendError { pub struct LlamaCppBackend { backlog: Sender<(GenerationContext, UnboundedSender)>, - scheduler_handle: JoinHandle<()>, + _scheduler_handle: JoinHandle<()>, } impl LlamaCppBackend { @@ -101,7 +101,7 @@ impl LlamaCppBackend { let handle = unsafe { spawn(|| scheduler_loop(backend, tokenizer, receiver)) }; Ok(Self { backlog: submitter, - scheduler_handle: handle, + _scheduler_handle: handle, }) } }