mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-05-04 16:52:06 +00:00
feat(tgi_common): add initial set of common functions for reuse
This commit is contained in:
parent
31a6065fac
commit
5f9120da9c
@ -16,16 +16,16 @@ option(TGI_BUILD_CCL "Flag to enable/disable build of tgiccl collective library"
|
||||
|
||||
# Add some modules
|
||||
include(FetchContent)
|
||||
include(cmake/fmt.cmake)
|
||||
include(cmake/spdlog.cmake)
|
||||
|
||||
# Let's find LibTorch
|
||||
include(cmake/torch.cmake)
|
||||
find_package(Python3 COMPONENTS Interpreter)
|
||||
ProbeForPyTorchInstall()
|
||||
ConfigurePyTorch()
|
||||
|
||||
find_package(Torch REQUIRED)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")
|
||||
find_package(Python3 COMPONENTS Interpreter)
|
||||
|
||||
# TGI common
|
||||
add_subdirectory(common)
|
||||
|
||||
# Include submodules
|
||||
if (${TGI_BUILD_CCL})
|
||||
|
6
csrc/cmake/fmt.cmake
Normal file
6
csrc/cmake/fmt.cmake
Normal file
@ -0,0 +1,6 @@
|
||||
FetchContent_Declare(
|
||||
fmt
|
||||
GIT_REPOSITORY https://github.com/fmtlib/fmt
|
||||
GIT_TAG 11.0.1
|
||||
)
|
||||
FetchContent_MakeAvailable(fmt)
|
@ -1,6 +1,17 @@
|
||||
fetchcontent_declare(
|
||||
spdlog
|
||||
URL https://github.com/gabime/spdlog/archive/refs/tags/v1.14.1.tar.gz
|
||||
)
|
||||
set(SPDLOG_USE_FMT ON)
|
||||
set(SPDLOG_BUILD_SHARED OFF)
|
||||
set(SPDLOG_FMT_EXTERNAL ON)
|
||||
|
||||
# Define the level at which SPDLOG_ compilation level is defined
|
||||
if (${CMAKE_BUILD_TYPE} STREQUAL "Debug")
|
||||
add_compile_definitions(SPDLOG_ACTIVE_LEVEL SPDLOG_LEVEL_DEBUG)
|
||||
else ()
|
||||
add_compile_definitions(SPDLOG_ACTIVE_LEVEL SPDLOG_LEVEL_INFO)
|
||||
endif ()
|
||||
|
||||
fetchcontent_declare(
|
||||
spdlog
|
||||
GIT_REPOSITORY https://github.com/gabime/spdlog.git
|
||||
GIT_TAG v1.14.1
|
||||
)
|
||||
fetchcontent_makeavailable(spdlog)
|
@ -1,148 +1,7 @@
|
||||
# ProbeForPyTorchInstall
|
||||
# Attempts to find a Torch installation and set the Torch_ROOT variable
|
||||
# based on introspecting the python environment. This allows a subsequent
|
||||
# call to find_package(Torch) to work.
|
||||
function(ProbeForPyTorchInstall)
|
||||
if (Torch_ROOT)
|
||||
message(STATUS "Using cached Torch root = ${Torch_ROOT}")
|
||||
else ()
|
||||
message(STATUS "Checking for PyTorch using ${Python3_EXECUTABLE} ...")
|
||||
execute_process(
|
||||
COMMAND ${Python3_EXECUTABLE}
|
||||
-c "import os;import torch;print(torch.utils.cmake_prefix_path, end='')"
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
|
||||
RESULT_VARIABLE PYTORCH_STATUS
|
||||
OUTPUT_VARIABLE PYTORCH_PACKAGE_DIR)
|
||||
if (NOT PYTORCH_STATUS EQUAL "0")
|
||||
message(STATUS "Unable to 'import torch' with ${Python3_EXECUTABLE} (fallback to explicit config)")
|
||||
return()
|
||||
endif ()
|
||||
message(STATUS "Found PyTorch installation at ${PYTORCH_PACKAGE_DIR}")
|
||||
|
||||
set(Torch_ROOT "${PYTORCH_PACKAGE_DIR}" CACHE STRING
|
||||
"Torch configure directory" FORCE)
|
||||
endif ()
|
||||
endfunction()
|
||||
|
||||
|
||||
# ConfigurePyTorch
|
||||
# Extensions compiled against PyTorch must be ABI-compatible with PyTorch.
|
||||
# On Linux, there are two components to this:
|
||||
# 1) Dual ABI settings for libstdc++
|
||||
# See https://gcc.gnu.org/onlinedocs/libstdc++/manual/using_dual_abi.html
|
||||
# For this, PyTorch helpfully provides a function to check which ABI it was
|
||||
# compiled against.
|
||||
# 2) C++ ABI compatibility version
|
||||
# See https://gcc.gnu.org/onlinedocs/libstdc++/manual/abi.html (Sec 5/6)
|
||||
# The second is a bit more complicated. GCC has official compatibility strings
|
||||
# which can be specified by -fabi-version. Clang has no notion of ABI
|
||||
# versioning (https://lists.llvm.org/pipermail/cfe-dev/2015-June/043735.html).
|
||||
# Separately, pybind11 keeps an internal variable which records its ABI info
|
||||
# (PYBIND11_INTERNALS_ID in include/pybind11/detail/internals.h). Differences
|
||||
# in this variable between torch-mlir and PyTorch will cause type errors.
|
||||
# Thus, our best option is to:
|
||||
# a) Identify which ABI version PyTorch was compiled with
|
||||
# b) Tell gcc to use that version
|
||||
# or
|
||||
# c) Tell clang to pretend to use it and hope it's ABI-compatible, and
|
||||
# tell pybind to pretend we're gcc.
|
||||
#
|
||||
# MacOS does not have a dual ABI problem.
|
||||
# FIXME: I don't know if MacOS needs ABI compatibility version flags.
|
||||
#
|
||||
# In the future, we may want to switch away from custom building these
|
||||
# extensions and instead rely on the Torch machinery directly (definitely want
|
||||
# to do that for official builds).
|
||||
function(ConfigurePyTorch)
|
||||
message(STATUS "Checking PyTorch ABI settings...")
|
||||
if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
|
||||
# Check dual ABI setting first
|
||||
execute_process(
|
||||
COMMAND ${Python3_EXECUTABLE}
|
||||
-c "import torch; import sys; sys.stdout.write('1' if torch.compiled_with_cxx11_abi() else '0')"
|
||||
RESULT_VARIABLE _result
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
|
||||
OUTPUT_VARIABLE _use_cxx11_abi)
|
||||
if (_result)
|
||||
message(FATAL_ERROR "Failed to determine C++ Dual ABI: ${Python3_EXECUTABLE} -> ${_result}")
|
||||
endif ()
|
||||
message(STATUS "PyTorch C++ Dual ABI setting: \"${_use_cxx11_abi}\"")
|
||||
|
||||
# Check ABI compatibility version
|
||||
execute_process(
|
||||
COMMAND ${Python3_EXECUTABLE}
|
||||
-c "import torch; import sys; abi=torch._C._PYBIND11_BUILD_ABI; abi.startswith('_cxxabi10') or sys.exit(1); sys.stdout.write(str(abi[-2:]))"
|
||||
RESULT_VARIABLE _result
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
|
||||
OUTPUT_VARIABLE _cxx_abi_version)
|
||||
if (_result)
|
||||
message(FATAL_ERROR "Failed to determine C++ ABI version")
|
||||
endif ()
|
||||
message(STATUS "PyTorch C++ ABI version: \"${_cxx_abi_version}\"")
|
||||
|
||||
# Specialize compile flags for compiler
|
||||
if (${CMAKE_CXX_COMPILER_ID} STREQUAL "GNU")
|
||||
set(TORCH_CXXFLAGS "-D_GLIBCXX_USE_CXX11_ABI=${_use_cxx11_abi} -fabi-version=${_cxx_abi_version}")
|
||||
elseif (${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang")
|
||||
set(TORCH_CXXFLAGS "-D_GLIBCXX_USE_CXX11_ABI=${_use_cxx11_abi} -U__GXX_ABI_VERSION -D__GXX_ABI_VERSION=10${_cxx_abi_version} '-DPYBIND11_COMPILER_TYPE=\"_gcc\"'")
|
||||
else ()
|
||||
message(WARNING "Unrecognized compiler. Cannot determine ABI flags.")
|
||||
return()
|
||||
endif ()
|
||||
set(TORCH_CXXFLAGS "${TORCH_CXXFLAGS}" PARENT_SCOPE)
|
||||
endif ()
|
||||
endfunction()
|
||||
|
||||
function(ConfigureLibTorch)
|
||||
message(STATUS "Checking LibTorch ABI settings...")
|
||||
if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
|
||||
message(STATUS "libtorch_python is ${TORCH_INSTALL_PREFIX}/lib/libtorch_python.so")
|
||||
# Check dual ABI setting first
|
||||
execute_process(
|
||||
COMMAND bash "-c" "cat ${TORCH_INSTALL_PREFIX}/share/cmake/Torch/TorchConfig.cmake | egrep -o '_GLIBCXX_USE_CXX11_ABI=[0-1]' | egrep -o '.$'"
|
||||
RESULT_VARIABLE _result
|
||||
OUTPUT_VARIABLE _use_cxx11_abi
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
if (_result)
|
||||
message(FATAL_ERROR "Failed to determine LibTorch C++ Dual ABI")
|
||||
endif ()
|
||||
message(STATUS "LibTorch C++ Dual ABI setting: \"${_use_cxx11_abi}\"")
|
||||
|
||||
# Check ABI compatibility version
|
||||
execute_process(
|
||||
COMMAND bash "-c" "strings ${TORCH_INSTALL_PREFIX}/lib/libtorch_python.so | egrep '^_cxxabi[0-9]{4}' | egrep -o '..$'"
|
||||
RESULT_VARIABLE _result
|
||||
OUTPUT_VARIABLE _cxx_abi_version
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
if (_result)
|
||||
message(FATAL_ERROR "Failed to determine LibTorch C++ ABI version")
|
||||
endif ()
|
||||
message(STATUS "LibTorch C++ ABI version: \"${_cxx_abi_version}\"")
|
||||
|
||||
# Specialize compile flags for compiler
|
||||
if (${CMAKE_CXX_COMPILER_ID} STREQUAL "GNU")
|
||||
set(TORCH_CXXFLAGS "-D_GLIBCXX_USE_CXX11_ABI=${_use_cxx11_abi} -fabi-version=${_cxx_abi_version}")
|
||||
elseif (${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang")
|
||||
set(TORCH_CXXFLAGS "-D_GLIBCXX_USE_CXX11_ABI=${_use_cxx11_abi} -U__GXX_ABI_VERSION -D__GXX_ABI_VERSION=10${_cxx_abi_version} '-DPYBIND11_COMPILER_TYPE=\"_gcc\"'")
|
||||
else ()
|
||||
message(WARNING "Unrecognized compiler. Cannot determine ABI flags.")
|
||||
return()
|
||||
endif ()
|
||||
set(TORCH_CXXFLAGS "${TORCH_CXXFLAGS}" PARENT_SCOPE)
|
||||
endif ()
|
||||
endfunction()
|
||||
|
||||
function(torch_mlir_python_target_compile_options target)
|
||||
target_compile_options(${target} PRIVATE
|
||||
$<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>,$<CXX_COMPILER_ID:GNU>>:
|
||||
# Enable RTTI and exceptions.
|
||||
-frtti -fexceptions
|
||||
# Noisy pybind warnings
|
||||
-Wno-unused-value
|
||||
-Wno-covered-switch-default
|
||||
>
|
||||
$<$<CXX_COMPILER_ID:MSVC>:
|
||||
# Enable RTTI and exceptions.
|
||||
/EHsc /GR>
|
||||
)
|
||||
endfunction()
|
||||
fetchcontent_declare(
|
||||
Torch
|
||||
URL https://download.pytorch.org/libtorch/cu124/libtorch-cxx11-abi-shared-with-deps-2.4.1%2Bcu124.zip
|
||||
# OVERRIDE_FIND_PACKAGE
|
||||
)
|
||||
FetchContent_MakeAvailable(Torch)
|
||||
list(APPEND CMAKE_PREFIX_PATH ${Torch_SOURCE_DIR})
|
||||
|
16
csrc/common/CMakeLists.txt
Normal file
16
csrc/common/CMakeLists.txt
Normal file
@ -0,0 +1,16 @@
|
||||
|
||||
set(TGI_COMMON_HEADERS include/common/device.hpp)
|
||||
set(TGI_COMMON_SOURCES lib/device.cpp)
|
||||
|
||||
add_library(tgi_common SHARED ${TGI_COMMON_HEADERS} ${TGI_COMMON_SOURCES})
|
||||
target_link_libraries(tgi_common fmt::fmt spdlog::spdlog ${TORCH_LIBRARIES})
|
||||
|
||||
target_include_directories(tgi_common PRIVATE
|
||||
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include/common>
|
||||
$<INSTALL_INTERFACE:include>
|
||||
)
|
||||
|
||||
target_include_directories(tgi_common PUBLIC
|
||||
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
|
||||
$<INSTALL_INTERFACE:include/>
|
||||
)
|
35
csrc/common/include/common/device.hpp
Normal file
35
csrc/common/include/common/device.hpp
Normal file
@ -0,0 +1,35 @@
|
||||
//
|
||||
// Created by morgan on 27/09/24.
|
||||
//
|
||||
|
||||
#ifndef TGI_DEVICE_HPP
|
||||
#define TGI_DEVICE_HPP
|
||||
#include <cstdint>
|
||||
#include <nvml.h>
|
||||
#include <optional>
|
||||
|
||||
namespace huggingface::tgi {
|
||||
using device_index_t = uint8_t;
|
||||
|
||||
/**
|
||||
* Attempt to retrieve the referred GPU by its index on the system
|
||||
* @param device Device index
|
||||
* @return
|
||||
*/
|
||||
std::optional<nvmlDevice_t> GetDeviceByIndex(device_index_t device);
|
||||
|
||||
/**
|
||||
* Check whether all the GPUs have direct remote memory access to each other
|
||||
*/
|
||||
bool IsP2PComplete();
|
||||
|
||||
/**
|
||||
* Check if GPU "from" has remote memory access to GPU "to"
|
||||
* @param from Originating GPU memory
|
||||
* @param to Destination GPU memory
|
||||
* @return True if p2p is available, false otherwise
|
||||
*/
|
||||
bool IsP2PAvailable(device_index_t from, device_index_t to);
|
||||
}
|
||||
|
||||
#endif // TGI_DEVICE_HPP
|
20
csrc/common/lib/device.cpp
Normal file
20
csrc/common/lib/device.cpp
Normal file
@ -0,0 +1,20 @@
|
||||
//
|
||||
// Created by morgan on 27/09/24.
|
||||
//
|
||||
|
||||
#include "device.hpp"
|
||||
|
||||
std::optional<nvmlDevice_t> huggingface::tgi::GetDeviceByIndex(device_index_t device)
|
||||
{
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
bool huggingface::tgi::IsP2PComplete()
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
bool huggingface::tgi::IsP2PAvailable(device_index_t from, device_index_t to)
|
||||
{
|
||||
return false;
|
||||
}
|
@ -6,7 +6,7 @@ set(TGICCL_SOURCES TgiCclBackend.cpp)
|
||||
find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
add_library(tgiccl SHARED ${TGICCL_HEADERS} ${TGICCL_SOURCES})
|
||||
target_link_libraries(tgiccl PUBLIC spdlog::spdlog CUDA::nvml ${TORCH_LIBRARIES})
|
||||
target_link_libraries(tgiccl PUBLIC tgi_common fmt::fmt spdlog::spdlog CUDA::nvml ${TORCH_LIBRARIES})
|
||||
|
||||
add_executable(test_tgiccl test_tgiccl.cpp)
|
||||
target_link_libraries(test_tgiccl tgiccl spdlog::spdlog)
|
||||
target_link_libraries(test_tgiccl PUBLIC tgiccl fmt::fmt spdlog::spdlog ${TORCH_LIBRARIES})
|
@ -7,5 +7,5 @@
|
||||
int main() {
|
||||
auto a = huggingface::tgi::tgiccl::IsNvLinkAvailable(0, 1);
|
||||
auto b = huggingface::tgi::tgiccl::IsNvLinkAvailable(0, 2);
|
||||
auto c = huggingface::tgi::tgiccl::IsNvLinkAvailable(0, 3);
|
||||
auto d = huggingface::tgi::tgiccl::IsNvLinkAvailable(0, 3);
|
||||
}
|
@ -13,8 +13,8 @@
|
||||
|
||||
constexpr auto CLL_BACKEND_NAME = "tgiccl";
|
||||
|
||||
namespace huggingface::tgi::tgiccl {
|
||||
|
||||
namespace huggingface::tgi::tgiccl
|
||||
{
|
||||
static std::once_flag NVML_INIT_FLAG;
|
||||
#define ENSURE_NVML_INIT() std::call_once(NVML_INIT_FLAG, nvmlInit_v2);
|
||||
|
||||
@ -46,7 +46,10 @@ namespace huggingface::tgi::tgiccl {
|
||||
// Query link between both
|
||||
nvmlGpuP2PStatus_t status;
|
||||
if(nvmlDeviceGetP2PStatus(devFrom.value(), devTo.value(), NVML_P2P_CAPS_INDEX_NVLINK, &status) != NVML_SUCCESS)
|
||||
{
|
||||
SPDLOG_ERROR(FMT_STRING("Failed to retrieve the p2p status for device {:d} <-> {:d}"), from, to);
|
||||
return false;
|
||||
}
|
||||
|
||||
return status == NVML_P2P_STATUS_OK;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user