mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-19 13:52:07 +00:00
* Build faster Signed-off-by: Adrien Gallouët <angt@huggingface.co> * Make --model-gguf optional Signed-off-by: Adrien Gallouët <angt@huggingface.co> * Bump llama.cpp Signed-off-by: Adrien Gallouët <angt@huggingface.co> * Enable mmap, offload_kqv & flash_attention by default Signed-off-by: Adrien Gallouët <angt@huggingface.co> * Update doc Signed-off-by: Adrien Gallouët <angt@huggingface.co> * Better error message Signed-off-by: Adrien Gallouët <angt@huggingface.co> * Update doc Signed-off-by: Adrien Gallouët <angt@huggingface.co> * Update installed packages Signed-off-by: Adrien Gallouët <angt@huggingface.co> * Save gguf in models/MODEL_ID/model.gguf Signed-off-by: Adrien Gallouët <angt@huggingface.co> * Fix build with Mach-O Signed-off-by: Adrien Gallouët <angt@huggingface.co> * Quantize without llama-quantize Signed-off-by: Adrien Gallouët <angt@huggingface.co> * Bump llama.cpp and switch to ggml-org Signed-off-by: Adrien Gallouët <angt@huggingface.co> * Remove make-gguf.sh Signed-off-by: Adrien Gallouët <angt@huggingface.co> * Update Cargo.lock Signed-off-by: Adrien Gallouët <angt@huggingface.co> * Support HF_HUB_USER_AGENT_ORIGIN Signed-off-by: Adrien Gallouët <angt@huggingface.co> * Bump llama.cpp Signed-off-by: Adrien Gallouët <angt@huggingface.co> * Add --build-arg llamacpp_native & llamacpp_cpu_arm_arch Signed-off-by: Adrien Gallouët <angt@huggingface.co> --------- Signed-off-by: Adrien Gallouët <angt@huggingface.co>
50 lines
1.6 KiB
Rust
50 lines
1.6 KiB
Rust
use bindgen::callbacks::{ItemInfo, ParseCallbacks};
|
|
use std::env;
|
|
use std::path::PathBuf;
|
|
|
|
#[derive(Debug)]
|
|
struct PrefixStripper;
|
|
|
|
impl ParseCallbacks for PrefixStripper {
|
|
fn generated_name_override(&self, item_info: ItemInfo<'_>) -> Option<String> {
|
|
item_info.name.strip_prefix("llama_").map(str::to_string)
|
|
}
|
|
}
|
|
|
|
fn main() {
|
|
if let Some(cuda_version) = option_env!("CUDA_VERSION") {
|
|
let mut version: Vec<&str> = cuda_version.split('.').collect();
|
|
if version.len() > 2 {
|
|
version.pop();
|
|
}
|
|
let cuda_version = format!("cuda-{}", version.join("."));
|
|
pkg_config::Config::new().probe(&cuda_version).unwrap();
|
|
}
|
|
let llama = pkg_config::Config::new().probe("llama").unwrap();
|
|
|
|
for path in &llama.link_paths {
|
|
println!("cargo:rustc-link-arg=-Wl,-rpath,{}", path.display());
|
|
}
|
|
if cfg!(target_os = "linux") {
|
|
println!("cargo:rustc-link-arg=-Wl,--disable-new-dtags");
|
|
}
|
|
let bindings = bindgen::Builder::default()
|
|
.clang_args(
|
|
llama
|
|
.include_paths
|
|
.iter()
|
|
.map(|p| format!("-I{}", p.display())),
|
|
)
|
|
.header_contents("llama_bindings.h", "#include <llama.h>")
|
|
.prepend_enum_name(false)
|
|
.parse_callbacks(Box::new(PrefixStripper))
|
|
.parse_callbacks(Box::new(bindgen::CargoCallbacks::new()))
|
|
.generate()
|
|
.expect("Unable to generate bindings");
|
|
|
|
let out_path = PathBuf::from(env::var("OUT_DIR").unwrap());
|
|
bindings
|
|
.write_to_file(out_path.join("llamacpp.rs"))
|
|
.expect("Couldn't write bindings!");
|
|
}
|