misc(cmake): add parameter to build specific cuda arch

This commit is contained in:
Morgan Funtowicz 2024-10-18 17:10:22 +02:00
parent 52d57dca79
commit e4432d36b1

View File

@ -1,11 +1,12 @@
cmake_minimum_required(VERSION 3.20) cmake_minimum_required(VERSION 3.24)
project(tgi-llama-cpp-backend VERSION 1.0.0) project(tgi-llama-cpp-backend VERSION 1.0.0)
set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_STANDARD 23)
include(FetchContent) include(FetchContent)
set(LLAMA_CPP_TARGET_VERSION "b3837" STRING "Version of llama.cpp to build against") set(LLAMA_CPP_TARGET_VERSION "b3837" CACHE STRING "Version of llama.cpp to build against")
set(LLAMA_CPP_TARGET_CUDA_ARCHS "75-real;80-real;86-real;89-real;90-real" CACHE STRING "CUDA arch(s) to build")
option(LLAMA_CPP_BUILD_OFFLINE_RUNNER "Flag to build the standalone c++ backend runner") option(LLAMA_CPP_BUILD_OFFLINE_RUNNER "Flag to build the standalone c++ backend runner")
option(LLAMA_CPP_BUILD_CUDA "Flag to build CUDA enabled inference through llama.cpp") option(LLAMA_CPP_BUILD_CUDA "Flag to build CUDA enabled inference through llama.cpp")
@ -13,18 +14,22 @@ option(LLAMA_CPP_BUILD_CUDA "Flag to build CUDA enabled inference through llama.
include(cmake/fmt.cmake) include(cmake/fmt.cmake)
include(cmake/spdlog.cmake) include(cmake/spdlog.cmake)
if(${LLAMA_CPP_BUILD_CUDA}) if (${LLAMA_CPP_BUILD_CUDA})
message(STATUS "Enabling llama.cpp CUDA support") message(STATUS "Enabling llama.cpp CUDA support")
if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
set(CMAKE_CUDA_ARCHITECTURES ${LLAMA_CPP_TARGET_CUDA_ARCHS})
endif ()
set(GGML_CUDA ON) set(GGML_CUDA ON)
endif() endif ()
# Download llama.cpp repo at the specific version # Download llama.cpp repo at the specific version
fetchcontent_declare( fetchcontent_declare(
llama llama
# DOWNLOAD_EXTRACT_TIMESTAMP TRUE # DOWNLOAD_EXTRACT_TIMESTAMP TRUE
GIT_REPOSITORY https://github.com/ggerganov/llama.cpp.git GIT_REPOSITORY https://github.com/ggerganov/llama.cpp.git
GIT_TAG b3837 GIT_TAG b3837
GIT_SHALLOW FALSE GIT_SHALLOW FALSE
) )
fetchcontent_makeavailable(llama) fetchcontent_makeavailable(llama)
@ -33,10 +38,10 @@ add_library(tgi_llama_cpp_backend_impl STATIC csrc/backend.hpp csrc/backend.cpp)
target_compile_features(tgi_llama_cpp_backend_impl PRIVATE cxx_std_11) target_compile_features(tgi_llama_cpp_backend_impl PRIVATE cxx_std_11)
target_link_libraries(tgi_llama_cpp_backend_impl PUBLIC fmt::fmt spdlog::spdlog llama common) target_link_libraries(tgi_llama_cpp_backend_impl PUBLIC fmt::fmt spdlog::spdlog llama common)
if(${LLAMA_CPP_BUILD_OFFLINE_RUNNER}) if (${LLAMA_CPP_BUILD_OFFLINE_RUNNER})
message(STATUS "Building llama.cpp offline runner") message(STATUS "Building llama.cpp offline runner")
add_executable(tgi_llama_cpp_offline_runner offline/main.cpp) add_executable(tgi_llama_cpp_offline_runner offline/main.cpp)
target_link_libraries(tgi_llama_cpp_offline_runner tgi_llama_cpp_backend_impl) target_link_libraries(tgi_llama_cpp_offline_runner tgi_llama_cpp_backend_impl)
endif() endif ()