From 7eec0f704f05cbcc55e8b2a8132679b497d02fe3 Mon Sep 17 00:00:00 2001
From: Morgan Funtowicz <funtowiczmo@gmail.com>
Date: Tue, 5 Nov 2024 23:48:13 +0100
Subject: [PATCH] chore(backend): minor fixes mostly format

---
 backends/llamacpp/build.rs         | 8 ++++----
 backends/llamacpp/csrc/backend.cpp | 2 +-
 backends/llamacpp/csrc/backend.hpp | 7 ++++++-
 backends/llamacpp/src/backend.rs   | 4 ++--
 4 files changed, 13 insertions(+), 8 deletions(-)
diff --git a/backends/llamacpp/build.rs b/backends/llamacpp/build.rs
index eefc6403..1ab926d4 100644
--- a/backends/llamacpp/build.rs
+++ b/backends/llamacpp/build.rs
@@ -99,11 +99,11 @@ fn main() {
     println!("cargo:rustc-link-search=native={}", out_dir.display());
 
     if is_debug {
-        println!("cargo:rustc-link-lib=static=fmtd");
-        println!("cargo:rustc-link-lib=static=spdlogd");
+        println!("cargo:rustc-link-lib=dylib=fmtd");
+        println!("cargo:rustc-link-lib=dylib=spdlogd");
     } else {
-        println!("cargo:rustc-link-lib=fmt");
-        println!("cargo:rustc-link-lib=spdlog");
+        println!("cargo:rustc-link-lib=dylib=fmt");
+        println!("cargo:rustc-link-lib=dylib=spdlog");
     }
 
     println!("cargo:rustc-link-lib=static=common");
diff --git a/backends/llamacpp/csrc/backend.cpp b/backends/llamacpp/csrc/backend.cpp
index f6956381..739b84a1 100644
--- a/backends/llamacpp/csrc/backend.cpp
+++ b/backends/llamacpp/csrc/backend.cpp
@@ -170,7 +170,7 @@ namespace huggingface::tgi::backends::llamacpp {
               mContext_(llama_context_factory(model)),
               mWorker_(mModel_, params.value_or(llama_context_default_params())) {
         llama_numa_init(ggml_numa_strategy::GGML_NUMA_STRATEGY_NUMACTL);
-    }
+    };
 
     std::expected<size_t, backend_error_t>
     single_worker_backend_t::stream(
diff --git a/backends/llamacpp/csrc/backend.hpp b/backends/llamacpp/csrc/backend.hpp
index bf9df5cc..4abc202d 100644
--- a/backends/llamacpp/csrc/backend.hpp
+++ b/backends/llamacpp/csrc/backend.hpp
@@ -157,10 +157,11 @@ namespace huggingface::tgi::backends::llamacpp {
 
     class single_worker_backend_t : backend_base_t {
     private:
-        constexpr const static auto llama_context_factory = [](llama_model *pModel) -> llama_context_ptr {
+        constexpr static auto llama_context_factory = [](llama_model *pModel) -> llama_context_ptr {
             auto llParams = llama_context_default_params();
             llParams.flash_attn = true;
             llParams.n_batch = 1;
+            llParams.n_threads = 1;
             llParams.no_perf = true;
             llParams.attention_type = llama_attention_type::LLAMA_ATTENTION_TYPE_CAUSAL;
 
@@ -173,6 +174,8 @@ namespace huggingface::tgi::backends::llamacpp {
     public:
         explicit single_worker_backend_t(llama_model *pModel, const std::optional<llama_context_params> &);
 
+        using backend_base_t::generate;
+
         std::expected<size_t, backend_error_t> stream(
                 std::span<const llama_token> tokens,
                 const generation_params_t &generation_params,
@@ -185,6 +188,8 @@ namespace huggingface::tgi::backends::llamacpp {
         llama_context_ptr mContext_;
 
     public:
+        using backend_base_t::generate;
+
         std::expected<size_t, backend_error_t> stream(
                 std::span<const llama_token> tokens,
                 const generation_params_t &generation_params,
diff --git a/backends/llamacpp/src/backend.rs b/backends/llamacpp/src/backend.rs
index 62b4743d..609c8405 100644
--- a/backends/llamacpp/src/backend.rs
+++ b/backends/llamacpp/src/backend.rs
@@ -70,7 +70,7 @@ pub enum LlamaCppBackendError {
 
 pub struct LlamaCppBackend {
     backlog: Sender<(GenerationContext, UnboundedSender<InferResult>)>,
-    scheduler_handle: JoinHandle<()>,
+    _scheduler_handle: JoinHandle<()>,
 }
 
 impl LlamaCppBackend {
@@ -101,7 +101,7 @@ impl LlamaCppBackend {
         let handle = unsafe { spawn(|| scheduler_loop(backend, tokenizer, receiver)) };
         Ok(Self {
             backlog: submitter,
-            scheduler_handle: handle,
+            _scheduler_handle: handle,
         })
     }
 }