mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-21 23:12:07 +00:00
* misc(cmake) update dependencies * feat(hardware) enable new hardware.hpp and unittests * test(ctest) enable address sanitizer * feat(backend): initial rewrite of the backend for simplicity * feat(backend): remove all the logs from hardware.hpp * feat(backend): added some logging * feat(backend): enable compiler warning if support for RVO not applying * feat(backend): missing return statement * feat(backend): introduce backend_workspace_t to store precomputed information from the engine folder * feat(backend): delete previous backend impl * feat(backend): more impl * feat(backend): use latest trtllm main version to have g++ >= 13 compatibility * feat(backend): allow overriding which Python to use * feat(backend): fix backend_exception_t -> backend_error_t naming * feat(backend): impl missing generation_step_t as return value of pull_tokens * feat(backend): make backend_workspace_t::engines_folder constexpr * feat(backend): fix main.rs retrieving the tokenizer * feat(backend): add guard to multiple header definitions * test(backend): add more unittest * feat(backend): remove constexpr from par * feat(backend): remove constexpig * test(backend): more test coverage * chore(trtllm): update dependency towards 0.15.0 * effectively cancel the request on the executor * feat(backend) fix moving backend when pulling * feat(backend): make sure we can easily cancel request on the executor * feat(backend): fix missing "0" field access * misc(backend): fix reborrowing Pin<&mut T> as described in the doc https://doc.rust-lang.org/stable/std/pin/struct.Pin.html#method.as_mut * chore: Add doc and CI for TRTLLM (#2799) * chore: Add doc and CI for TRTLLM * chore: Add doc and CI for TRTLLM * chore: Add doc and CI for TRTLLM * chore: Add doc and CI for TRTLLM * doc: Formatting * misc(backend): indent --------- Co-authored-by: Hugo Larcher <hugo.larcher@huggingface.co>
82 lines
3.8 KiB
C++
82 lines
3.8 KiB
C++
//
|
|
// Created by mfuntowicz on 11/16/24.
|
|
//
|
|
|
|
#include <catch2/catch_all.hpp>
|
|
#include "../csrc/hardware.hpp"
|
|
|
|
using namespace huggingface::tgi::hardware::cuda;
|
|
|
|
TEST_CASE("is_at_least_<arch>") {
|
|
const static auto VOLTA_CAPABILITIES = compute_capabilities_t(7, 0);
|
|
REQUIRE(VOLTA_CAPABILITIES.is_at_least_volta());
|
|
REQUIRE_FALSE(VOLTA_CAPABILITIES.is_at_least_turing());
|
|
REQUIRE_FALSE(VOLTA_CAPABILITIES.is_at_least_ampere());
|
|
REQUIRE_FALSE(VOLTA_CAPABILITIES.is_at_least_ada_lovelace());
|
|
REQUIRE_FALSE(VOLTA_CAPABILITIES.is_at_least_hopper());
|
|
|
|
const static auto TURING_CAPABILITIES = compute_capabilities_t(7, 5);
|
|
REQUIRE(TURING_CAPABILITIES.is_at_least_volta());
|
|
REQUIRE(TURING_CAPABILITIES.is_at_least_turing());
|
|
REQUIRE_FALSE(TURING_CAPABILITIES.is_at_least_ampere());
|
|
REQUIRE_FALSE(TURING_CAPABILITIES.is_at_least_ada_lovelace());
|
|
REQUIRE_FALSE(TURING_CAPABILITIES.is_at_least_hopper());
|
|
|
|
const static auto AMPERE_CAPABILITIES = compute_capabilities_t(8, 0);
|
|
REQUIRE(AMPERE_CAPABILITIES.is_at_least_volta());
|
|
REQUIRE(AMPERE_CAPABILITIES.is_at_least_turing());
|
|
REQUIRE(AMPERE_CAPABILITIES.is_at_least_ampere());
|
|
REQUIRE_FALSE(AMPERE_CAPABILITIES.is_at_least_ada_lovelace());
|
|
REQUIRE_FALSE(AMPERE_CAPABILITIES.is_at_least_hopper());
|
|
|
|
const static auto ADA_LOVELACE_CAPABILITIES = compute_capabilities_t(8, 9);
|
|
REQUIRE(ADA_LOVELACE_CAPABILITIES.is_at_least_volta());
|
|
REQUIRE(ADA_LOVELACE_CAPABILITIES.is_at_least_turing());
|
|
REQUIRE(ADA_LOVELACE_CAPABILITIES.is_at_least_ampere());
|
|
REQUIRE(ADA_LOVELACE_CAPABILITIES.is_at_least_ada_lovelace());
|
|
REQUIRE_FALSE(ADA_LOVELACE_CAPABILITIES.is_at_least_hopper());
|
|
|
|
const static auto HOPPER_CAPABILITIES = compute_capabilities_t(9, 0);
|
|
REQUIRE(HOPPER_CAPABILITIES.is_at_least_volta());
|
|
REQUIRE(HOPPER_CAPABILITIES.is_at_least_turing());
|
|
REQUIRE(HOPPER_CAPABILITIES.is_at_least_ampere());
|
|
REQUIRE(HOPPER_CAPABILITIES.is_at_least_ada_lovelace());
|
|
REQUIRE(HOPPER_CAPABILITIES.is_at_least_hopper());
|
|
}
|
|
|
|
TEST_CASE("is_at_least") {
|
|
const static auto VOLTA_CAPABILITIES = compute_capabilities_t(7, 0);
|
|
REQUIRE(VOLTA_CAPABILITIES.is_at_least(VOLTA));
|
|
REQUIRE_FALSE(VOLTA_CAPABILITIES.is_at_least(TURING));
|
|
REQUIRE_FALSE(VOLTA_CAPABILITIES.is_at_least(AMPERE));
|
|
REQUIRE_FALSE(VOLTA_CAPABILITIES.is_at_least(ADA_LOVELACE));
|
|
REQUIRE_FALSE(VOLTA_CAPABILITIES.is_at_least(HOPPER));
|
|
|
|
const static auto TURING_CAPABILITIES = compute_capabilities_t(7, 5);
|
|
REQUIRE(TURING_CAPABILITIES.is_at_least(VOLTA));
|
|
REQUIRE(TURING_CAPABILITIES.is_at_least(TURING));
|
|
REQUIRE_FALSE(TURING_CAPABILITIES.is_at_least(AMPERE));
|
|
REQUIRE_FALSE(TURING_CAPABILITIES.is_at_least(ADA_LOVELACE));
|
|
REQUIRE_FALSE(TURING_CAPABILITIES.is_at_least(HOPPER));
|
|
|
|
const static auto AMPERE_CAPABILITIES = compute_capabilities_t(8, 0);
|
|
REQUIRE(AMPERE_CAPABILITIES.is_at_least(VOLTA));
|
|
REQUIRE(AMPERE_CAPABILITIES.is_at_least(TURING));
|
|
REQUIRE(AMPERE_CAPABILITIES.is_at_least(AMPERE));
|
|
REQUIRE_FALSE(AMPERE_CAPABILITIES.is_at_least(ADA_LOVELACE));
|
|
REQUIRE_FALSE(AMPERE_CAPABILITIES.is_at_least(HOPPER));
|
|
|
|
const static auto ADA_LOVELACE_CAPABILITIES = compute_capabilities_t(8, 9);
|
|
REQUIRE(ADA_LOVELACE_CAPABILITIES.is_at_least(VOLTA));
|
|
REQUIRE(ADA_LOVELACE_CAPABILITIES.is_at_least(TURING));
|
|
REQUIRE(ADA_LOVELACE_CAPABILITIES.is_at_least(AMPERE));
|
|
REQUIRE(ADA_LOVELACE_CAPABILITIES.is_at_least(ADA_LOVELACE));
|
|
REQUIRE_FALSE(ADA_LOVELACE_CAPABILITIES.is_at_least(HOPPER));
|
|
|
|
const static auto HOPPER_CAPABILITIES = compute_capabilities_t (9, 0);
|
|
REQUIRE(HOPPER_CAPABILITIES.is_at_least(VOLTA));
|
|
REQUIRE(HOPPER_CAPABILITIES.is_at_least(TURING));
|
|
REQUIRE(HOPPER_CAPABILITIES.is_at_least(AMPERE));
|
|
REQUIRE(HOPPER_CAPABILITIES.is_at_least(ADA_LOVELACE));
|
|
REQUIRE(HOPPER_CAPABILITIES.is_at_least(HOPPER));
|
|
} |