mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 04:14:52 +00:00
Fix Dockerfile
Signed-off-by: Adrien Gallouët <angt@huggingface.co>
This commit is contained in:
parent
0d27ee74de
commit
4841f71a0e
@ -3,7 +3,6 @@ FROM nvidia/cuda:12.8.0-cudnn-devel-ubuntu24.04 AS deps
|
||||
ARG llamacpp_version=b4651
|
||||
ARG llamacpp_cuda=OFF
|
||||
ARG cuda_arch=75-real;80-real;86-real;89-real;90-real
|
||||
ENV TGI_LLAMA_PKG_CUDA=cuda-${CUDA_VERSION%.*}
|
||||
|
||||
WORKDIR /opt/src
|
||||
|
||||
|
@ -31,10 +31,18 @@ impl ParseCallbacks for PrefixStripper {
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let pkg_cuda = option_env!("TGI_LLAMA_PKG_CUDA");
|
||||
let lib_search_path = option_env!("TGI_LLAMA_LD_LIBRARY_PATH");
|
||||
let lib_target_hardware = option_env!("TGI_LLAMA_HARDWARE_TARGET").unwrap_or("cpu");
|
||||
|
||||
if let Some(cuda_version) = option_env!("CUDA_VERSION") {
|
||||
let mut version: Vec<&str> = cuda_version.split('.').collect();
|
||||
if version.len() > 2 {
|
||||
version.pop();
|
||||
}
|
||||
pkg_config::Config::new().probe(&version.join(".")).unwrap();
|
||||
}
|
||||
pkg_config::Config::new().probe("llama").unwrap();
|
||||
|
||||
let bindings = bindgen::Builder::default()
|
||||
.header("src/wrapper.h")
|
||||
.prepend_enum_name(false)
|
||||
@ -48,10 +56,5 @@ fn main() {
|
||||
.write_to_file(out_path.join("llamacpp.rs"))
|
||||
.expect("Couldn't write bindings!");
|
||||
|
||||
if let Some(pkg_cuda) = pkg_cuda {
|
||||
pkg_config::Config::new().probe(pkg_cuda).unwrap();
|
||||
}
|
||||
pkg_config::Config::new().probe("llama").unwrap();
|
||||
|
||||
inject_transient_dependencies(lib_search_path, lib_target_hardware);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user