From 9bb630971286451d8b87e05e1d7f7cc40b269965 Mon Sep 17 00:00:00 2001 From: Morgan Funtowicz Date: Sat, 30 Nov 2024 23:04:57 +0100 Subject: [PATCH] feat(backend): added some logging --- backends/trtllm/csrc/backend.cpp | 6 +++--- backends/trtllm/csrc/backend.hpp | 19 ++++++++++++++++++- backends/trtllm/csrc/ffi.hpp | 13 +++++++++---- 3 files changed, 30 insertions(+), 8 deletions(-) diff --git a/backends/trtllm/csrc/backend.cpp b/backends/trtllm/csrc/backend.cpp index 2c681dd1..2b2e0239 100644 --- a/backends/trtllm/csrc/backend.cpp +++ b/backends/trtllm/csrc/backend.cpp @@ -1,5 +1,4 @@ #include -#include #include "backend.hpp" #include @@ -12,7 +11,7 @@ namespace huggingface::tgi::backends::trtllm { std::expected backend_t::submit(std::span token_ids, generation_params_t generation_params, sampling_params_t sampling_params) noexcept { - SPDLOG_DEBUG(FMT_STRING("Submitting {:d} tokens to the executor for scheduling"), token_ids.size()); + SPDLOG_DEBUG("Submitting {:d} tokens to the executor for scheduling ({}, {})", token_ids.size(), generation_params, sampling_params); return executor_.enqueueRequest(tle::Request { {token_ids.begin(), token_ids.end()}, // Making actual copy of the tokens static_cast(generation_params.max_new_tokens), @@ -28,11 +27,12 @@ namespace huggingface::tgi::backends::trtllm { } std::vector backend_t::pull_tokens() noexcept { + SPDLOG_TRACE(FMT_STRING("Pulling out tokens ({:d} available)"), num_tokens_ready()); return executor_.awaitResponses(); } void backend_t::cancel(request_id_t request_id) noexcept { - SPDLOG_INFO(FMT_STRING("Cancelling request: {:d}"), request_id); + SPDLOG_TRACE(FMT_STRING("Cancelling request: {:d}"), request_id); executor_.cancelRequest(request_id); } } \ No newline at end of file diff --git a/backends/trtllm/csrc/backend.hpp b/backends/trtllm/csrc/backend.hpp index 9627f0ec..d17a344c 100644 --- a/backends/trtllm/csrc/backend.hpp +++ b/backends/trtllm/csrc/backend.hpp @@ -5,6 +5,7 @@ #include #include +#include #include namespace huggingface::tgi::backends::trtllm { @@ -97,4 +98,20 @@ namespace huggingface::tgi::backends::trtllm { */ void cancel(request_id_t) noexcept; }; -} \ No newline at end of file +} + +template <> struct fmt::formatter: formatter { + auto format(huggingface::tgi::backends::trtllm::generation_params_t c, format_context& ctx) const -> format_context::iterator { + return format_to(ctx.out(), "generation_params_t{{ max_new_tokens={:d} }}", c.max_new_tokens); + } +}; + +template <> struct fmt::formatter: formatter { + auto format(huggingface::tgi::backends::trtllm::sampling_params_t c, format_context& ctx) const -> format_context::iterator { + return format_to( + ctx.out(), + "sampling_params_t{{ top_k={:d}, top_p={:.3f}, repetition_penalty={:.3f}, frequency_penalty={:.3f}, length_penalty={:.3f}, temperature={:.3f}, seed={:d} }}", + c.top_k, c.top_p, c.repetition_penalty, c.frequency_penalty, c.length_penalty, c.temperature, c.seed + ); + } +}; \ No newline at end of file diff --git a/backends/trtllm/csrc/ffi.hpp b/backends/trtllm/csrc/ffi.hpp index d72b26db..ff68deb3 100644 --- a/backends/trtllm/csrc/ffi.hpp +++ b/backends/trtllm/csrc/ffi.hpp @@ -10,10 +10,12 @@ namespace rust::behavior { } } +#include +#include +#include #include namespace huggingface::tgi::backends::trtllm { - class tensorrt_llm_backend_t { private: backend_t inner_; @@ -35,9 +37,12 @@ namespace huggingface::tgi::backends::trtllm { float_t frequency_penalty, uint64_t seed ) { + // This is enabled only if using add_compile_definitions(SPDLOG_ACTIVE_LEVEL=SPDLOG_LEVEL_TRACE) + SPDLOG_TRACE(FMT_STRING("[FFI] Submitting {:d} prompt tokens to the executor")); + // Submit the request to the executor and get back a potential request_id used to track request status const auto maybe_request_id = inner_.submit( - {tokens_.data(), tokens.size()}, + {tokens.data(), tokens.size()}, {max_new_tokens}, {top_k, top_p, repetition_penalty, frequency_penalty, temperature, seed} ); @@ -46,12 +51,12 @@ namespace huggingface::tgi::backends::trtllm { if(maybe_request_id.has_value()) [[likely]] { return *maybe_request_id; } else { - + SPDLOG_WARN("[FFI] Failed to submit request to the executor"); } } void cancel(request_id_t requestId) noexcept { - SPDLOG + SPDLOG_DEBUG(FMT_STRING("[FFI] cancelling request {:d}"), requestId); inner_.cancel(requestId); } };