mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-22 23:42:06 +00:00
misc(cmake): add parameter to build specific cuda arch
This commit is contained in:
parent
52d57dca79
commit
e4432d36b1
@ -1,11 +1,12 @@
|
|||||||
cmake_minimum_required(VERSION 3.20)
|
cmake_minimum_required(VERSION 3.24)
|
||||||
|
|
||||||
project(tgi-llama-cpp-backend VERSION 1.0.0)
|
project(tgi-llama-cpp-backend VERSION 1.0.0)
|
||||||
set(CMAKE_CXX_STANDARD 20)
|
set(CMAKE_CXX_STANDARD 23)
|
||||||
|
|
||||||
include(FetchContent)
|
include(FetchContent)
|
||||||
|
|
||||||
set(LLAMA_CPP_TARGET_VERSION "b3837" STRING "Version of llama.cpp to build against")
|
set(LLAMA_CPP_TARGET_VERSION "b3837" CACHE STRING "Version of llama.cpp to build against")
|
||||||
|
set(LLAMA_CPP_TARGET_CUDA_ARCHS "75-real;80-real;86-real;89-real;90-real" CACHE STRING "CUDA arch(s) to build")
|
||||||
option(LLAMA_CPP_BUILD_OFFLINE_RUNNER "Flag to build the standalone c++ backend runner")
|
option(LLAMA_CPP_BUILD_OFFLINE_RUNNER "Flag to build the standalone c++ backend runner")
|
||||||
option(LLAMA_CPP_BUILD_CUDA "Flag to build CUDA enabled inference through llama.cpp")
|
option(LLAMA_CPP_BUILD_CUDA "Flag to build CUDA enabled inference through llama.cpp")
|
||||||
|
|
||||||
@ -15,6 +16,10 @@ include(cmake/spdlog.cmake)
|
|||||||
|
|
||||||
if (${LLAMA_CPP_BUILD_CUDA})
|
if (${LLAMA_CPP_BUILD_CUDA})
|
||||||
message(STATUS "Enabling llama.cpp CUDA support")
|
message(STATUS "Enabling llama.cpp CUDA support")
|
||||||
|
|
||||||
|
if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
|
||||||
|
set(CMAKE_CUDA_ARCHITECTURES ${LLAMA_CPP_TARGET_CUDA_ARCHS})
|
||||||
|
endif ()
|
||||||
set(GGML_CUDA ON)
|
set(GGML_CUDA ON)
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user