chore(trtllm): validate there are enough GPus on the system for the desired model

2025-07-12 02:40:16 +00:00 · 2024-10-21 23:40:38 +02:00 · 2024-10-21 23:40:38 +02:00 · a6ac2741a3
commit a6ac2741a3
parent 848b8ad554
1 changed files with 10 additions and 1 deletions
--- a/backends/trtllm/lib/backend.cpp
+++ b/backends/trtllm/lib/backend.cpp
@ -88,7 +88,16 @@ huggingface::tgi::backends::TensorRtLlmBackend::TensorRtLlmBackend(
        config(json::parse(std::ifstream(enginesFolder / "config.json"))),
        executor(enginesFolder, tensorrt_llm::executor::ModelType::kDECODER_ONLY,
                 GetExecutorConfig(config, executorWorker.string())) {
-    SPDLOG_INFO(FMT_STRING("Engine (version={})"), config["/version"_json_pointer].get_ref<const std::string &>());
+
    SPDLOG_INFO(FMT_STRING("Engine (version={})"), config["/version"_json_pointer].get<std::string_view>());
    // Ensure we have enough GPUs on the system
    const auto worldSize = config["/pretrained_config/mapping/world_size"_json_pointer].get<size_t>();
    const auto numGpus = huggingface::hardware::cuda::GetNumDevices().value_or(0);
    if (numGpus < worldSize) {
        SPDLOG_CRITICAL(FMT_NOT_ENOUGH_GPUS, numGpus, worldSize);
        // todo : raise exception to catch on rust side
    }
    // Cache variables
    maxNumTokens = config["/build_config/max_num_tokens"_json_pointer].get<uint32_t>();