text-generation-inference/backends/trtllm/build.rs

use std::env;
use std::path::PathBuf;

use cxx_build::CFG;
use pkg_config;

const ADDITIONAL_BACKEND_LINK_LIBRARIES: [&str; 2] = ["spdlog", "fmt"];
const CUDA_ARCH_LIST: Option<&str> = option_env!("CUDA_ARCH_LIST");
const CUDA_REQUIRED_VERSION: &str = "12.4";
const MPI_REQUIRED_VERSION: &str = "4.1";
const INSTALL_PREFIX: Option<&str> = option_env!("CMAKE_INSTALL_PREFIX");
const TENSORRT_ROOT_DIR: Option<&str> = option_env!("TENSORRT_ROOT_DIR");

macro_rules! probe {
    ($name: literal, $version: expr) => {
        if let Err(_) = pkg_config::probe_library($name) {
            pkg_config::probe_library(&format!("cuda-{}", $version))
                .expect(&format!("Failed to locate {}", $name));
        }
    };
}

fn main() {
    // Misc variables
    let out_dir = PathBuf::from(env::var("OUT_DIR").unwrap());
    let build_profile = env::var("PROFILE").unwrap();
    let is_debug = match build_profile.as_ref() {
        "debug" => true,
        _ => false,
    };

    // Build the backend implementation through CMake
    let backend_path = cmake::Config::new(".")
        .uses_cxx11()
        .generator("Ninja")
        .profile(match is_debug {
            true => "Debug",
            false => "Release",
        })
        .env("OPT_LEVEL", "3")
        .out_dir(INSTALL_PREFIX.unwrap_or("/usr/local/tgi"))
        .define("CMAKE_CUDA_COMPILER", "/usr/local/cuda/bin/nvcc")
        .define(
            "TGI_TRTLLM_BACKEND_TARGET_CUDA_ARCH_LIST",
            CUDA_ARCH_LIST.unwrap_or("90-real"), // Hopper by default
        )
        .define(
            "TGI_TRTLLM_BACKEND_TRT_ROOT",
            TENSORRT_ROOT_DIR.unwrap_or("/usr/local/tensorrt"),
        )
        .build();

    // Additional transitive CMake dependencies
    let deps_folder = out_dir.join("build").join("_deps");
    for dependency in ADDITIONAL_BACKEND_LINK_LIBRARIES {
        let dep_name = match build_profile.as_ref() {
            "debug" => format!("{}d", dependency),
            _ => String::from(dependency),
        };
        let dep_path = deps_folder.join(format!("{}-build", dependency));
        println!("cargo:rustc-link-search={}", dep_path.display());
        println!("cargo:rustc-link-lib=static={}", dep_name);
    }

    // Build the FFI layer calling the backend above
    CFG.include_prefix = "backends/trtllm";
    cxx_build::bridge("src/lib.rs")
        .static_flag(true)
        .include(deps_folder.join("fmt-src").join("include"))
        .include(deps_folder.join("spdlog-src").join("include"))
        .include(deps_folder.join("json-src").join("include"))
        .include(deps_folder.join("trtllm-src").join("cpp").join("include"))
        .include("/usr/local/cuda/include")
        .include("/usr/local/tensorrt/include")
        .file("src/ffi.cpp")
        .std("c++20")
        .compile("tgi_trtllm_backend");

    println!("cargo:rerun-if-changed=CMakeLists.txt");
    println!("cargo:rerun-if-changed=include/backend.h");
    println!("cargo:rerun-if-changed=lib/backend.cpp");
    println!("cargo:rerun-if-changed=include/ffi.h");
    println!("cargo:rerun-if-changed=src/ffi.cpp");

    // Emit linkage search path
    probe!("ompi", MPI_REQUIRED_VERSION);

    // Probe CUDA & co. with pkg-config
    probe!("cuda", CUDA_REQUIRED_VERSION);
    probe!("cudart", CUDA_REQUIRED_VERSION);
    probe!("cublas", CUDA_REQUIRED_VERSION);
    probe!("nvidia-ml", CUDA_REQUIRED_VERSION);

    // TensorRT
    println!(r"cargo:rustc-link-search=native=/usr/local/tensorrt/lib");
    println!(r"cargo:rustc-link-search=native={}", backend_path.display());

    // TensorRT-LLM
    println!(
        r"cargo:rustc-link-search=native={}",
        backend_path.join("lib").display()
    );
    println!("cargo:rustc-link-lib=dylib=tensorrt_llm");
    println!("cargo:rustc-link-lib=static=tensorrt_llm_executor_static");
    println!("cargo:rustc-link-lib=dylib=nvinfer_plugin_tensorrt_llm");
    println!("cargo:rustc-link-lib=dylib=tensorrt_llm_nvrtc_wrapper");

    // Backend
    println!("cargo:rustc-link-lib=static=tgi_trtllm_backend_impl");
    println!("cargo:rustc-link-lib=static=tgi_trtllm_backend");
}
Working FFI call for TGI and TRTLLM backend 2024-07-01 13:53:23 +00:00			`use std::env;`
			`use std::path::PathBuf;`

make sure to track include/ffi.h to trigger rebuild from cargo 2024-07-12 19:26:04 +00:00			`use cxx_build::CFG;`
leverage pkg-config to probe libraries paths and reuse new install structure from cmake 2024-07-22 11:39:11 +00:00			`use pkg_config;`
make sure to track include/ffi.h to trigger rebuild from cargo 2024-07-12 19:26:04 +00:00
Working FFI call for TGI and TRTLLM backend 2024-07-01 13:53:23 +00:00			`const ADDITIONAL_BACKEND_LINK_LIBRARIES: [&str; 2] = ["spdlog", "fmt"];`
add initial Dockerfile for TRTLLM backend 2024-07-19 22:08:12 +00:00			`const CUDA_ARCH_LIST: Option<&str> = option_env!("CUDA_ARCH_LIST");`
leverage pkg-config to probe libraries paths and reuse new install structure from cmake 2024-07-22 11:39:11 +00:00			`const CUDA_REQUIRED_VERSION: &str = "12.4";`
			`const MPI_REQUIRED_VERSION: &str = "4.1";`
			`const INSTALL_PREFIX: Option<&str> = option_env!("CMAKE_INSTALL_PREFIX");`
add initial Dockerfile for TRTLLM backend 2024-07-19 22:08:12 +00:00			`const TENSORRT_ROOT_DIR: Option<&str> = option_env!("TENSORRT_ROOT_DIR");`
Working FFI call for TGI and TRTLLM backend 2024-07-01 13:53:23 +00:00
leverage pkg-config to probe libraries paths and reuse new install structure from cmake 2024-07-22 11:39:11 +00:00			`macro_rules! probe {`
			`($name: literal, $version: expr) => {`
			`if let Err(_) = pkg_config::probe_library($name) {`
			`pkg_config::probe_library(&format!("cuda-{}", $version))`
			`.expect(&format!("Failed to locate {}", $name));`
			`}`
			`};`
			`}`

Initial setup for CXX binding to TRTLLM 2024-06-30 21:37:20 +00:00			`fn main() {`
Enable end to end CMake build 2024-07-03 08:27:53 +00:00			`// Misc variables`
Working FFI call for TGI and TRTLLM backend 2024-07-01 13:53:23 +00:00			`let out_dir = PathBuf::from(env::var("OUT_DIR").unwrap());`
			`let build_profile = env::var("PROFILE").unwrap();`
Enable end to end CMake build 2024-07-03 08:27:53 +00:00			`let is_debug = match build_profile.as_ref() {`
			`"debug" => true,`
			`_ => false,`
			`};`

Working FFI call for TGI and TRTLLM backend 2024-07-01 13:53:23 +00:00			`// Build the backend implementation through CMake`
Initial setup for CXX binding to TRTLLM 2024-06-30 21:37:20 +00:00			`let backend_path = cmake::Config::new(".")`
			`.uses_cxx11()`
			`.generator("Ninja")`
Enable end to end CMake build 2024-07-03 08:27:53 +00:00			`.profile(match is_debug {`
			`true => "Debug",`
			`false => "Release",`
Working FFI call for TGI and TRTLLM backend 2024-07-01 13:53:23 +00:00			`})`
leverage pkg-config to probe libraries paths and reuse new install structure from cmake 2024-07-22 11:39:11 +00:00			`.env("OPT_LEVEL", "3")`
			`.out_dir(INSTALL_PREFIX.unwrap_or("/usr/local/tgi"))`
			`.define("CMAKE_CUDA_COMPILER", "/usr/local/cuda/bin/nvcc")`
add initial Dockerfile for TRTLLM backend 2024-07-19 22:08:12 +00:00			`.define(`
			`"TGI_TRTLLM_BACKEND_TARGET_CUDA_ARCH_LIST",`
			`CUDA_ARCH_LIST.unwrap_or("90-real"), // Hopper by default`
			`)`
			`.define(`
			`"TGI_TRTLLM_BACKEND_TRT_ROOT",`
			`TENSORRT_ROOT_DIR.unwrap_or("/usr/local/tensorrt"),`
			`)`
Initial setup for CXX binding to TRTLLM 2024-06-30 21:37:20 +00:00			`.build();`

include guard to build example in cmakelists 2024-07-11 21:24:01 +00:00			`// Additional transitive CMake dependencies`
			`let deps_folder = out_dir.join("build").join("_deps");`
			`for dependency in ADDITIONAL_BACKEND_LINK_LIBRARIES {`
			`let dep_name = match build_profile.as_ref() {`
			`"debug" => format!("{}d", dependency),`
			`_ => String::from(dependency),`
			`};`
impl RwLock scenario for TensorRtLllmBackend 2024-07-16 20:08:10 +00:00			`let dep_path = deps_folder.join(format!("{}-build", dependency));`
			`println!("cargo:rustc-link-search={}", dep_path.display());`
include guard to build example in cmakelists 2024-07-11 21:24:01 +00:00			`println!("cargo:rustc-link-lib=static={}", dep_name);`
			`}`

Working FFI call for TGI and TRTLLM backend 2024-07-01 13:53:23 +00:00			`// Build the FFI layer calling the backend above`
Initial setup for CXX binding to TRTLLM 2024-06-30 21:37:20 +00:00			`CFG.include_prefix = "backends/trtllm";`
			`cxx_build::bridge("src/lib.rs")`
Working FFI call for TGI and TRTLLM backend 2024-07-01 13:53:23 +00:00			`.static_flag(true)`
include guard to build example in cmakelists 2024-07-11 21:24:01 +00:00			`.include(deps_folder.join("fmt-src").join("include"))`
			`.include(deps_folder.join("spdlog-src").join("include"))`
			`.include(deps_folder.join("json-src").join("include"))`
			`.include(deps_folder.join("trtllm-src").join("cpp").join("include"))`
			`.include("/usr/local/cuda/include")`
			`.include("/usr/local/tensorrt/include")`
Initial setup for CXX binding to TRTLLM 2024-06-30 21:37:20 +00:00			`.file("src/ffi.cpp")`
			`.std("c++20")`
			`.compile("tgi_trtllm_backend");`

Working FFI call for TGI and TRTLLM backend 2024-07-01 13:53:23 +00:00			`println!("cargo:rerun-if-changed=CMakeLists.txt");`
Initial setup for CXX binding to TRTLLM 2024-06-30 21:37:20 +00:00			`println!("cargo:rerun-if-changed=include/backend.h");`
			`println!("cargo:rerun-if-changed=lib/backend.cpp");`
make sure to track include/ffi.h to trigger rebuild from cargo 2024-07-12 19:26:04 +00:00			`println!("cargo:rerun-if-changed=include/ffi.h");`
Working FFI call for TGI and TRTLLM backend 2024-07-01 13:53:23 +00:00			`println!("cargo:rerun-if-changed=src/ffi.cpp");`

leverage pkg-config to probe libraries paths and reuse new install structure from cmake 2024-07-22 11:39:11 +00:00			`// Emit linkage search path`
			`probe!("ompi", MPI_REQUIRED_VERSION);`
include guard to build example in cmakelists 2024-07-11 21:24:01 +00:00
leverage pkg-config to probe libraries paths and reuse new install structure from cmake 2024-07-22 11:39:11 +00:00			`// Probe CUDA & co. with pkg-config`
			`probe!("cuda", CUDA_REQUIRED_VERSION);`
			`probe!("cudart", CUDA_REQUIRED_VERSION);`
			`probe!("cublas", CUDA_REQUIRED_VERSION);`
			`probe!("nvidia-ml", CUDA_REQUIRED_VERSION);`
include guard to build example in cmakelists 2024-07-11 21:24:01 +00:00
leverage pkg-config to probe libraries paths and reuse new install structure from cmake 2024-07-22 11:39:11 +00:00			`// TensorRT`
include guard to build example in cmakelists 2024-07-11 21:24:01 +00:00			`println!(r"cargo:rustc-link-search=native=/usr/local/tensorrt/lib");`
			`println!(r"cargo:rustc-link-search=native={}", backend_path.display());`
leverage pkg-config to probe libraries paths and reuse new install structure from cmake 2024-07-22 11:39:11 +00:00
			`// TensorRT-LLM`
Working FFI call for TGI and TRTLLM backend 2024-07-01 13:53:23 +00:00			`println!(`
include guard to build example in cmakelists 2024-07-11 21:24:01 +00:00			`r"cargo:rustc-link-search=native={}",`
			`backend_path.join("lib").display()`
Working FFI call for TGI and TRTLLM backend 2024-07-01 13:53:23 +00:00			`);`
leverage pkg-config to probe libraries paths and reuse new install structure from cmake 2024-07-22 11:39:11 +00:00			`println!("cargo:rustc-link-lib=dylib=tensorrt_llm");`
			`println!("cargo:rustc-link-lib=static=tensorrt_llm_executor_static");`
include guard to build example in cmakelists 2024-07-11 21:24:01 +00:00			`println!("cargo:rustc-link-lib=dylib=nvinfer_plugin_tensorrt_llm");`
			`println!("cargo:rustc-link-lib=dylib=tensorrt_llm_nvrtc_wrapper");`
leverage pkg-config to probe libraries paths and reuse new install structure from cmake 2024-07-22 11:39:11 +00:00
			`// Backend`
Working FFI call for TGI and TRTLLM backend 2024-07-01 13:53:23 +00:00			`println!("cargo:rustc-link-lib=static=tgi_trtllm_backend_impl");`
include guard to build example in cmakelists 2024-07-11 21:24:01 +00:00			`println!("cargo:rustc-link-lib=static=tgi_trtllm_backend");`
Initial setup for CXX binding to TRTLLM 2024-06-30 21:37:20 +00:00			`}`