diff --git a/Dockerfile_llamacpp b/Dockerfile_llamacpp index 6fba85e9..7404ed4b 100644 --- a/Dockerfile_llamacpp +++ b/Dockerfile_llamacpp @@ -3,7 +3,6 @@ FROM nvidia/cuda:12.8.0-cudnn-devel-ubuntu24.04 AS deps ARG llamacpp_version=b4651 ARG llamacpp_cuda=OFF ARG cuda_arch=75-real;80-real;86-real;89-real;90-real -ENV TGI_LLAMA_PKG_CUDA=cuda-${CUDA_VERSION%.*} WORKDIR /opt/src diff --git a/backends/llamacpp/build.rs b/backends/llamacpp/build.rs index aa2a0d87..b554694b 100644 --- a/backends/llamacpp/build.rs +++ b/backends/llamacpp/build.rs @@ -31,10 +31,18 @@ impl ParseCallbacks for PrefixStripper { } fn main() { - let pkg_cuda = option_env!("TGI_LLAMA_PKG_CUDA"); let lib_search_path = option_env!("TGI_LLAMA_LD_LIBRARY_PATH"); let lib_target_hardware = option_env!("TGI_LLAMA_HARDWARE_TARGET").unwrap_or("cpu"); + if let Some(cuda_version) = option_env!("CUDA_VERSION") { + let mut version: Vec<&str> = cuda_version.split('.').collect(); + if version.len() > 2 { + version.pop(); + } + pkg_config::Config::new().probe(&version.join(".")).unwrap(); + } + pkg_config::Config::new().probe("llama").unwrap(); + let bindings = bindgen::Builder::default() .header("src/wrapper.h") .prepend_enum_name(false) @@ -48,10 +56,5 @@ fn main() { .write_to_file(out_path.join("llamacpp.rs")) .expect("Couldn't write bindings!"); - if let Some(pkg_cuda) = pkg_cuda { - pkg_config::Config::new().probe(pkg_cuda).unwrap(); - } - pkg_config::Config::new().probe("llama").unwrap(); - inject_transient_dependencies(lib_search_path, lib_target_hardware); }