mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-24 16:32:12 +00:00
chore(backend): minor fixes mostly format
This commit is contained in:
parent
a1154b17ec
commit
7eec0f704f
@ -99,11 +99,11 @@ fn main() {
|
||||
println!("cargo:rustc-link-search=native={}", out_dir.display());
|
||||
|
||||
if is_debug {
|
||||
println!("cargo:rustc-link-lib=static=fmtd");
|
||||
println!("cargo:rustc-link-lib=static=spdlogd");
|
||||
println!("cargo:rustc-link-lib=dylib=fmtd");
|
||||
println!("cargo:rustc-link-lib=dylib=spdlogd");
|
||||
} else {
|
||||
println!("cargo:rustc-link-lib=fmt");
|
||||
println!("cargo:rustc-link-lib=spdlog");
|
||||
println!("cargo:rustc-link-lib=dylib=fmt");
|
||||
println!("cargo:rustc-link-lib=dylib=spdlog");
|
||||
}
|
||||
|
||||
println!("cargo:rustc-link-lib=static=common");
|
||||
|
@ -170,7 +170,7 @@ namespace huggingface::tgi::backends::llamacpp {
|
||||
mContext_(llama_context_factory(model)),
|
||||
mWorker_(mModel_, params.value_or(llama_context_default_params())) {
|
||||
llama_numa_init(ggml_numa_strategy::GGML_NUMA_STRATEGY_NUMACTL);
|
||||
}
|
||||
};
|
||||
|
||||
std::expected<size_t, backend_error_t>
|
||||
single_worker_backend_t::stream(
|
||||
|
@ -157,10 +157,11 @@ namespace huggingface::tgi::backends::llamacpp {
|
||||
|
||||
class single_worker_backend_t : backend_base_t {
|
||||
private:
|
||||
constexpr const static auto llama_context_factory = [](llama_model *pModel) -> llama_context_ptr {
|
||||
constexpr static auto llama_context_factory = [](llama_model *pModel) -> llama_context_ptr {
|
||||
auto llParams = llama_context_default_params();
|
||||
llParams.flash_attn = true;
|
||||
llParams.n_batch = 1;
|
||||
llParams.n_threads = 1;
|
||||
llParams.no_perf = true;
|
||||
llParams.attention_type = llama_attention_type::LLAMA_ATTENTION_TYPE_CAUSAL;
|
||||
|
||||
@ -173,6 +174,8 @@ namespace huggingface::tgi::backends::llamacpp {
|
||||
public:
|
||||
explicit single_worker_backend_t(llama_model *pModel, const std::optional<llama_context_params> &);
|
||||
|
||||
using backend_base_t::generate;
|
||||
|
||||
std::expected<size_t, backend_error_t> stream(
|
||||
std::span<const llama_token> tokens,
|
||||
const generation_params_t &generation_params,
|
||||
@ -185,6 +188,8 @@ namespace huggingface::tgi::backends::llamacpp {
|
||||
llama_context_ptr mContext_;
|
||||
|
||||
public:
|
||||
using backend_base_t::generate;
|
||||
|
||||
std::expected<size_t, backend_error_t> stream(
|
||||
std::span<const llama_token> tokens,
|
||||
const generation_params_t &generation_params,
|
||||
|
@ -70,7 +70,7 @@ pub enum LlamaCppBackendError {
|
||||
|
||||
pub struct LlamaCppBackend {
|
||||
backlog: Sender<(GenerationContext, UnboundedSender<InferResult>)>,
|
||||
scheduler_handle: JoinHandle<()>,
|
||||
_scheduler_handle: JoinHandle<()>,
|
||||
}
|
||||
|
||||
impl LlamaCppBackend {
|
||||
@ -101,7 +101,7 @@ impl LlamaCppBackend {
|
||||
let handle = unsafe { spawn(|| scheduler_loop(backend, tokenizer, receiver)) };
|
||||
Ok(Self {
|
||||
backlog: submitter,
|
||||
scheduler_handle: handle,
|
||||
_scheduler_handle: handle,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user