chore(backend): minor fixes mostly format

2025-07-03 22:40:17 +00:00 · 2024-11-05 23:48:13 +01:00 · 2024-11-05 23:48:13 +01:00 · 7eec0f704f
commit 7eec0f704f
parent a1154b17ec
4 changed files with 13 additions and 8 deletions
--- a/backends/llamacpp/build.rs
+++ b/backends/llamacpp/build.rs
@ -99,11 +99,11 @@ fn main() {
    println!("cargo:rustc-link-search=native={}", out_dir.display());

    if is_debug {
-        println!("cargo:rustc-link-lib=static=fmtd");
-        println!("cargo:rustc-link-lib=static=spdlogd");
+        println!("cargo:rustc-link-lib=dylib=fmtd");
+        println!("cargo:rustc-link-lib=dylib=spdlogd");
    } else {
-        println!("cargo:rustc-link-lib=fmt");
-        println!("cargo:rustc-link-lib=spdlog");
+        println!("cargo:rustc-link-lib=dylib=fmt");
+        println!("cargo:rustc-link-lib=dylib=spdlog");
    }

    println!("cargo:rustc-link-lib=static=common");
--- a/backends/llamacpp/csrc/backend.cpp
+++ b/backends/llamacpp/csrc/backend.cpp
@ -170,7 +170,7 @@ namespace huggingface::tgi::backends::llamacpp {
              mContext_(llama_context_factory(model)),
              mWorker_(mModel_, params.value_or(llama_context_default_params())) {
        llama_numa_init(ggml_numa_strategy::GGML_NUMA_STRATEGY_NUMACTL);
-    }
+    };

    std::expected<size_t, backend_error_t>
    single_worker_backend_t::stream(
--- a/backends/llamacpp/csrc/backend.hpp
+++ b/backends/llamacpp/csrc/backend.hpp
@ -157,10 +157,11 @@ namespace huggingface::tgi::backends::llamacpp {

    class single_worker_backend_t : backend_base_t {
    private:
-        constexpr const static auto llama_context_factory = [](llama_model *pModel) -> llama_context_ptr {
+        constexpr static auto llama_context_factory = [](llama_model *pModel) -> llama_context_ptr {
            auto llParams = llama_context_default_params();
            llParams.flash_attn = true;
            llParams.n_batch = 1;
+            llParams.n_threads = 1;
            llParams.no_perf = true;
            llParams.attention_type = llama_attention_type::LLAMA_ATTENTION_TYPE_CAUSAL;

@ -173,6 +174,8 @@ namespace huggingface::tgi::backends::llamacpp {
    public:
        explicit single_worker_backend_t(llama_model *pModel, const std::optional<llama_context_params> &);

+        using backend_base_t::generate;
+
        std::expected<size_t, backend_error_t> stream(
                std::span<const llama_token> tokens,
                const generation_params_t &generation_params,
@ -185,6 +188,8 @@ namespace huggingface::tgi::backends::llamacpp {
        llama_context_ptr mContext_;

    public:
+        using backend_base_t::generate;
+
        std::expected<size_t, backend_error_t> stream(
                std::span<const llama_token> tokens,
                const generation_params_t &generation_params,
--- a/backends/llamacpp/src/backend.rs
+++ b/backends/llamacpp/src/backend.rs
@ -70,7 +70,7 @@ pub enum LlamaCppBackendError {

 pub struct LlamaCppBackend {
    backlog: Sender<(GenerationContext, UnboundedSender<InferResult>)>,
-    scheduler_handle: JoinHandle<()>,
+    _scheduler_handle: JoinHandle<()>,
 }

 impl LlamaCppBackend {
@ -101,7 +101,7 @@ impl LlamaCppBackend {
        let handle = unsafe { spawn(|| scheduler_loop(backend, tokenizer, receiver)) };
        Ok(Self {
            backlog: submitter,
-            scheduler_handle: handle,
+            _scheduler_handle: handle,
        })
    }
 }