mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-20 14:22:08 +00:00
* test(ctest) enable address sanitizer * feat(trtllm): expose finish reason to Rust * feat(trtllm): fix logits retrieval * misc(ci): enabe building tensorrt-llm * misc(ci): update Rust action toolchain * misc(ci): let's try to build the Dockerfile for trtllm # Conflicts: # Dockerfile_trtllm * misc(ci): provide mecanism to cache inside container * misc(ci): export aws creds as output of step * misc(ci): let's try this way * misc(ci): again * misc(ci): again * misc(ci): add debug profile * misc(ci): add debug profile * misc(ci): lets actually use sccache ... * misc(ci): do not build with ssl enabled * misc(ci): WAT * misc(ci): WAT * misc(ci): WAT * misc(ci): WAT * misc(ci): WAT * misc(backend): test with TGI S3 conf * misc(backend): test with TGI S3 conf * misc(backend): once more? * misc(backend): let's try with GHA * misc(backend): missing env directive * misc(backend): make sure to correctly set IS_GHA_BUILD=true in wf * misc(backend): ok let's debug smtg * misc(backend): WWWWWWWWWWWWWAAAAAAAA * misc(backend): kthxbye retry s3 * misc(backend): use session token * misc(backend): add more info * misc(backend): lets try 1h30 * misc(backend): lets try 1h30 * misc(backend): increase to 2h * misc(backend): lets try... * misc(backend): lets try... * misc(backend): let's build for ci-runtime * misc(backend): let's add some more tooling * misc(backend): add some tags * misc(backend): disable Werror for now * misc(backend): added automatic gha detection * misc(backend): remove leak sanitizer which is included in asan * misc(backend): forward env * misc(backend): forward env * misc(backend): let's try * misc(backend): let's try * misc(backend): again * misc(backend): again * misc(backend): again * misc(backend): again * misc(backend): again * misc(backend): fix sscache -> sccache * misc(backend): fix sscache -> sccache * misc(backend): fix sscache -> sccache * misc(backend): let's actually cache things now * misc(backend): let's actually cache things now * misc(backend): attempt to run the testS? * misc(backend): attempt to run the tests? * misc(backend): attempt to run the tests? * change runner size * fix: Correctly tag docker images (#2878) * fix: Correctly tag docker images * fix: Correctly tag docker images * misc(llamacpp): maybe? * misc(llamacpp): maybe? * misc(llamacpp): maybe? * misc(ci): gogogo * misc(ci): gogogo * misc(ci): gogogo * misc(ci): gogogo * misc(ci): gogogo * misc(ci): gogogo * misc(ci): go * misc(ci): go * misc(ci): go * misc(ci): use bin folder * misc(ci): make the wf callable for reuse * misc(ci): make the wf callable for reuse (bis) * misc(ci): make the wf callable for reuse (bis) * misc(ci): give the wf a name * Create test-trtllm.yml * Update test-trtllm.yml * Create build-trtllm2 * Rename build-trtllm2 to 1-build-trtllm2 * Rename test-trtllm.yml to 1-test-trtllm2.yml * misc(ci): fw secrets * Update 1-test-trtllm2.yml * Rename 1-build-trtllm2 to 1-build-trtllm2.yml * Update 1-test-trtllm2.yml * misc(ci): use ci-build.yaml as main dispatcher * Delete .github/workflows/1-test-trtllm2.yml * Delete .github/workflows/1-build-trtllm2.yml * misc(ci): rights? * misc(ci): rights? * misc(ci): once more? * misc(ci): once more? * misc(ci): baby more time? * misc(ci): baby more time? * misc(ci): try the permission above again? * misc(ci): try the permission above again? * misc(ci): try the permission scoped again? * misc(ci): install tensorrt_llm_executor_static * misc(ci): attempt to rebuild with sccache? * misc(ci):run the tests on GPU instance * misc(ci): let's actually setup sccache in the build.rs * misc(ci): reintroduce variables * misc(ci): enforce sccache * misc(ci): correct right job name dependency * misc(ci): detect dev profile for debug * misc(ci): detect gha build * misc(ci): detect gha build * misc(ci): ok debug * misc(ci): wtf * misc(ci): wtf2 * misc(ci): wtf3 * misc(ci): use commit HEAD instead of merge commit for image id * misc(ci): wtfinfini * misc(ci): wtfinfini * misc(ci): KAMEHAMEHA * Merge TRTLLM in standard CI * misc(ci): remove input machine * misc(ci): missing id-token for AWS auth * misc(ci): missing id-token for AWS auth * misc(ci): missing id-token for AWS auth * misc(ci): again... * misc(ci): again... * misc(ci): again... * misc(ci): again... * misc(ci): missing benchmark * misc(ci): missing backends * misc(ci): missing launcher * misc(ci): give everything aws needs * misc(ci): give everything aws needs * misc(ci): fix warnings * misc(ci): attempt to fix sccache not building trtllm * misc(ci): attempt to fix sccache not building trtllm again --------- Co-authored-by: Guillaume LEGENDRE <glegendre01@gmail.com> Co-authored-by: Hugo Larcher <hugo.larcher@huggingface.co> Co-authored-by: Pauline Bailly-Masson <155966238+paulinebm@users.noreply.github.com>
155 lines
6.2 KiB
C++
155 lines
6.2 KiB
C++
//
|
|
// Created by mfuntowicz on 12/3/24.
|
|
//
|
|
|
|
#include <catch2/catch_all.hpp>
|
|
#include <nlohmann/json.hpp>
|
|
#include <tensorrt_llm/executor/executor.h>
|
|
|
|
#include "backend.hpp"
|
|
|
|
using namespace huggingface::tgi::backends::trtllm;
|
|
|
|
TEST_CASE("parse generation_config.json all set", "[generation_config_t]")
|
|
{
|
|
const json config_j = {{"temperature", 0.6},
|
|
{"top_p", 0.95},
|
|
{"eos_token_id", {1, 2, 3}}};
|
|
const auto generation_config = generation_config_t(config_j);
|
|
|
|
REQUIRE_THAT(generation_config.temperature, Catch::Matchers::WithinAbs(0.6, 1e-6));
|
|
REQUIRE_THAT(generation_config.top_p, Catch::Matchers::WithinAbs(0.95, 1e-6));
|
|
|
|
// Stop words
|
|
REQUIRE_FALSE(generation_config.stop_words.empty());
|
|
REQUIRE(generation_config.stop_words.size() == config_j["/eos_token_id"_json_pointer].size());
|
|
|
|
for (auto [lhs, rhs]: std::views::zip(generation_config.stop_words, std::list<std::vector<int32_t>>{{1},
|
|
{2},
|
|
{3}})) {
|
|
// Currently we do not support multi-tokens stop words
|
|
REQUIRE(lhs.size() == 1);
|
|
REQUIRE(rhs.size() == 1);
|
|
REQUIRE_THAT(lhs, Catch::Matchers::UnorderedEquals(rhs));
|
|
}
|
|
}
|
|
|
|
TEST_CASE("parse generation_config.json default", "[generation_config_t]")
|
|
{
|
|
const json config_j = {{"eos_token_id", {1, 2, 3}}};
|
|
const auto generation_config = generation_config_t(config_j);
|
|
|
|
REQUIRE_THAT(generation_config.temperature, Catch::Matchers::WithinAbs(1.0, 1e-6));
|
|
REQUIRE_THAT(generation_config.top_p, Catch::Matchers::WithinAbs(1.0, 1e-6));
|
|
|
|
REQUIRE_FALSE(generation_config.stop_words.empty());
|
|
REQUIRE(generation_config.stop_words.size() == config_j["/eos_token_id"_json_pointer].size());
|
|
|
|
for (auto [lhs, rhs]: std::views::zip(generation_config.stop_words, std::list<std::vector<int32_t>>{{1},
|
|
{2},
|
|
{3}})) {
|
|
// Currently we do not support multi-tokens stop words
|
|
REQUIRE(lhs.size() == 1);
|
|
REQUIRE(rhs.size() == 1);
|
|
REQUIRE_THAT(lhs, Catch::Matchers::UnorderedEquals(rhs));
|
|
}
|
|
}
|
|
|
|
TEST_CASE("parse generation_config.json empty", "[generation_config_t]")
|
|
{
|
|
const json config_j = {{"eos_token_id", {}}};
|
|
const auto generation_config = generation_config_t(config_j);
|
|
|
|
REQUIRE_THAT(generation_config.temperature, Catch::Matchers::WithinAbs(1.0, 1e-6));
|
|
REQUIRE_THAT(generation_config.top_p, Catch::Matchers::WithinAbs(1.0, 1e-6));
|
|
|
|
REQUIRE(generation_config.stop_words.empty());
|
|
|
|
const json config_j2 = {};
|
|
const auto generation_config2 = generation_config_t(config_j);
|
|
|
|
REQUIRE_THAT(generation_config2.temperature, Catch::Matchers::WithinAbs(1.0, 1e-6));
|
|
REQUIRE_THAT(generation_config2.top_p, Catch::Matchers::WithinAbs(1.0, 1e-6));
|
|
|
|
REQUIRE(generation_config2.stop_words.empty());
|
|
}
|
|
|
|
TEST_CASE("parallel_config single", "[backend_workspace_t]")
|
|
{
|
|
// Generate temporary folder
|
|
const auto tmp_p = std::filesystem::temp_directory_path();
|
|
const auto config_p = tmp_p / "config.json";
|
|
const auto generation_config_p = tmp_p / "generation_config.json";
|
|
|
|
// Generate content
|
|
std::ofstream o_config(config_p);
|
|
o_config << R"({"pretrained_config": {"mapping": {"world_size": 2}}})"_json;
|
|
o_config.close();
|
|
|
|
std::ofstream o_generation_config(generation_config_p);
|
|
o_generation_config << R"({"eos_token_id": []})"_json;
|
|
o_generation_config.close();
|
|
|
|
const auto workspace = backend_workspace_t(tmp_p.generic_string(), tmp_p.generic_string());
|
|
const auto parallel = workspace.parallel_config();
|
|
REQUIRE(parallel.getCommunicationMode() == tle::CommunicationMode::kORCHESTRATOR);
|
|
REQUIRE(parallel.getCommunicationType() == tle::CommunicationType::kMPI);
|
|
|
|
std::filesystem::remove(config_p);
|
|
std::filesystem::remove(generation_config_p);
|
|
}
|
|
|
|
TEST_CASE("parallel_config multi", "[backend_workspace_t]")
|
|
{
|
|
// Generate temporary folder
|
|
const auto tmp_p = std::filesystem::temp_directory_path();
|
|
const auto config_p = tmp_p / "config.json";
|
|
const auto generation_config_p = tmp_p / "generation_config.json";
|
|
|
|
// Generate content
|
|
std::ofstream o_config(config_p);
|
|
o_config << R"({"pretrained_config": {"mapping": {"world_size": 1}}})"_json;
|
|
o_config.close();
|
|
|
|
std::ofstream o_generation_config(generation_config_p);
|
|
o_generation_config << R"({"eos_token_id": []})"_json;
|
|
o_generation_config.close();
|
|
|
|
const auto workspace = backend_workspace_t(tmp_p.generic_string(), tmp_p.generic_string());
|
|
const auto parallel = workspace.parallel_config();
|
|
REQUIRE(parallel.getCommunicationMode() == tle::CommunicationMode::kLEADER);
|
|
REQUIRE(parallel.getCommunicationType() == tle::CommunicationType::kMPI);
|
|
|
|
std::filesystem::remove(config_p);
|
|
std::filesystem::remove(generation_config_p);
|
|
}
|
|
|
|
TEST_CASE("executor_config", "[backend_workspace_t]")
|
|
{
|
|
|
|
}
|
|
|
|
TEST_CASE("sampling_params_t to tle::SamplingConfig", "[backend_t]")
|
|
{
|
|
const sampling_params_t params = {40, 0.95, 0.9, 1.0, 0.6, 2014};
|
|
const auto config = static_cast<tle::SamplingConfig>(params);
|
|
|
|
REQUIRE(config.getTopK().has_value());
|
|
REQUIRE(config.getTopK().value() == params.top_k);
|
|
|
|
REQUIRE(config.getSeed().has_value());
|
|
REQUIRE(config.getSeed().value() == params.seed);
|
|
|
|
REQUIRE(config.getTopP().has_value());
|
|
REQUIRE_THAT(*config.getTopP(), Catch::Matchers::WithinAbs(params.top_p, 1e-6f));
|
|
|
|
REQUIRE(config.getRepetitionPenalty().has_value());
|
|
REQUIRE_THAT(*config.getRepetitionPenalty(), Catch::Matchers::WithinAbs(params.repetition_penalty, 1e-6f));
|
|
|
|
REQUIRE(config.getFrequencyPenalty().has_value());
|
|
REQUIRE_THAT(*config.getFrequencyPenalty(), Catch::Matchers::WithinAbs(params.frequency_penalty, 1e-6f));
|
|
|
|
REQUIRE(config.getTemperature().has_value());
|
|
REQUIRE_THAT(*config.getTemperature(), Catch::Matchers::WithinAbs(params.temperature, 1e-6f));
|
|
}
|