chore(backend): minor fixes mostly format

This commit is contained in:
Morgan Funtowicz 2024-11-05 23:48:13 +01:00
parent a1154b17ec
commit 7eec0f704f
4 changed files with 13 additions and 8 deletions

View File

@ -99,11 +99,11 @@ fn main() {
println!("cargo:rustc-link-search=native={}", out_dir.display());
if is_debug {
println!("cargo:rustc-link-lib=static=fmtd");
println!("cargo:rustc-link-lib=static=spdlogd");
println!("cargo:rustc-link-lib=dylib=fmtd");
println!("cargo:rustc-link-lib=dylib=spdlogd");
} else {
println!("cargo:rustc-link-lib=fmt");
println!("cargo:rustc-link-lib=spdlog");
println!("cargo:rustc-link-lib=dylib=fmt");
println!("cargo:rustc-link-lib=dylib=spdlog");
}
println!("cargo:rustc-link-lib=static=common");

View File

@ -170,7 +170,7 @@ namespace huggingface::tgi::backends::llamacpp {
mContext_(llama_context_factory(model)),
mWorker_(mModel_, params.value_or(llama_context_default_params())) {
llama_numa_init(ggml_numa_strategy::GGML_NUMA_STRATEGY_NUMACTL);
}
};
std::expected<size_t, backend_error_t>
single_worker_backend_t::stream(

View File

@ -157,10 +157,11 @@ namespace huggingface::tgi::backends::llamacpp {
class single_worker_backend_t : backend_base_t {
private:
constexpr const static auto llama_context_factory = [](llama_model *pModel) -> llama_context_ptr {
constexpr static auto llama_context_factory = [](llama_model *pModel) -> llama_context_ptr {
auto llParams = llama_context_default_params();
llParams.flash_attn = true;
llParams.n_batch = 1;
llParams.n_threads = 1;
llParams.no_perf = true;
llParams.attention_type = llama_attention_type::LLAMA_ATTENTION_TYPE_CAUSAL;
@ -173,6 +174,8 @@ namespace huggingface::tgi::backends::llamacpp {
public:
explicit single_worker_backend_t(llama_model *pModel, const std::optional<llama_context_params> &);
using backend_base_t::generate;
std::expected<size_t, backend_error_t> stream(
std::span<const llama_token> tokens,
const generation_params_t &generation_params,
@ -185,6 +188,8 @@ namespace huggingface::tgi::backends::llamacpp {
llama_context_ptr mContext_;
public:
using backend_base_t::generate;
std::expected<size_t, backend_error_t> stream(
std::span<const llama_token> tokens,
const generation_params_t &generation_params,

View File

@ -70,7 +70,7 @@ pub enum LlamaCppBackendError {
pub struct LlamaCppBackend {
backlog: Sender<(GenerationContext, UnboundedSender<InferResult>)>,
scheduler_handle: JoinHandle<()>,
_scheduler_handle: JoinHandle<()>,
}
impl LlamaCppBackend {
@ -101,7 +101,7 @@ impl LlamaCppBackend {
let handle = unsafe { spawn(|| scheduler_loop(backend, tokenizer, receiver)) };
Ok(Self {
backlog: submitter,
scheduler_handle: handle,
_scheduler_handle: handle,
})
}
}