feat(backend): added some logging

2025-09-15 22:34:53 +00:00 · 2024-11-30 23:04:57 +01:00 · 2024-11-30 23:04:57 +01:00 · 9bb6309712
commit 9bb6309712
parent 6d3565759a
3 changed files with 30 additions and 8 deletions
--- a/backends/trtllm/csrc/backend.cpp
+++ b/backends/trtllm/csrc/backend.cpp
@ -1,5 +1,4 @@
 #include <ranges>
 #include <utility>
 #include "backend.hpp"
 #include <spdlog/spdlog.h>
@ -12,7 +11,7 @@ namespace huggingface::tgi::backends::trtllm {
    std::expected<request_id_t, backend_exception_t>
    backend_t::submit(std::span<tle::TokenIdType> token_ids, generation_params_t generation_params, sampling_params_t sampling_params) noexcept {
-        SPDLOG_DEBUG(FMT_STRING("Submitting {:d} tokens to the executor for scheduling"), token_ids.size());
+        SPDLOG_DEBUG("Submitting {:d} tokens to the executor for scheduling ({}, {})", token_ids.size(), generation_params, sampling_params);
        return executor_.enqueueRequest(tle::Request {
                {token_ids.begin(), token_ids.end()},  // Making actual copy of the tokens
                static_cast<tle::SizeType32>(generation_params.max_new_tokens),
@ -28,11 +27,12 @@ namespace huggingface::tgi::backends::trtllm {
    }
    std::vector<tle::Response> backend_t::pull_tokens() noexcept {
        SPDLOG_TRACE(FMT_STRING("Pulling out tokens ({:d} available)"), num_tokens_ready());
        return executor_.awaitResponses();
    }
    void backend_t::cancel(request_id_t request_id) noexcept {
-        SPDLOG_INFO(FMT_STRING("Cancelling request: {:d}"), request_id);
+        SPDLOG_TRACE(FMT_STRING("Cancelling request: {:d}"), request_id);
        executor_.cancelRequest(request_id);
    }
 }
--- a/backends/trtllm/csrc/backend.hpp
+++ b/backends/trtllm/csrc/backend.hpp
@ -5,6 +5,7 @@
 #include <list>
 #include <span>
 #include <spdlog/fmt/fmt.h>
 #include <tensorrt_llm/executor/executor.h>
 namespace huggingface::tgi::backends::trtllm {
@ -98,3 +99,19 @@ namespace huggingface::tgi::backends::trtllm {
        void cancel(request_id_t) noexcept;
    };
 }
 template <> struct fmt::formatter<huggingface::tgi::backends::trtllm::generation_params_t>: formatter<string_view> {
    auto format(huggingface::tgi::backends::trtllm::generation_params_t c, format_context& ctx) const -> format_context::iterator {
        return format_to(ctx.out(), "generation_params_t{{ max_new_tokens={:d} }}", c.max_new_tokens);
    }
 };
 template <> struct fmt::formatter<huggingface::tgi::backends::trtllm::sampling_params_t>: formatter<string_view> {
    auto format(huggingface::tgi::backends::trtllm::sampling_params_t c, format_context& ctx) const -> format_context::iterator {
        return format_to(
                ctx.out(),
                "sampling_params_t{{ top_k={:d}, top_p={:.3f}, repetition_penalty={:.3f}, frequency_penalty={:.3f}, length_penalty={:.3f}, temperature={:.3f}, seed={:d} }}",
                c.top_k, c.top_p, c.repetition_penalty, c.frequency_penalty, c.length_penalty, c.temperature, c.seed
        );
    }
 };
--- a/backends/trtllm/csrc/ffi.hpp
+++ b/backends/trtllm/csrc/ffi.hpp
@ -10,10 +10,12 @@ namespace rust::behavior {
    }
 }
 #include <spdlog/spdlog.h>
 #include <spdlog/pattern_formatter.h>
 #include <spdlog/fmt/fmt.h>
 #include <backend.hpp>
 namespace huggingface::tgi::backends::trtllm {
    class tensorrt_llm_backend_t {
    private:
        backend_t inner_;
@ -35,9 +37,12 @@ namespace huggingface::tgi::backends::trtllm {
                float_t frequency_penalty,
                uint64_t seed
        ) {
            // This is enabled only if using add_compile_definitions(SPDLOG_ACTIVE_LEVEL=SPDLOG_LEVEL_TRACE)
            SPDLOG_TRACE(FMT_STRING("[FFI] Submitting {:d} prompt tokens to the executor"));
            // Submit the request to the executor and get back a potential request_id used to track request status
            const auto maybe_request_id = inner_.submit(
-                {tokens_.data(), tokens.size()},
+                {tokens.data(), tokens.size()},
                {max_new_tokens},
                {top_k, top_p, repetition_penalty, frequency_penalty, temperature, seed}
            );
@ -46,12 +51,12 @@ namespace huggingface::tgi::backends::trtllm {
            if(maybe_request_id.has_value()) [[likely]] {
                return *maybe_request_id;
            } else {
-
+                SPDLOG_WARN("[FFI] Failed to submit request to the executor");
            }
        }
        void cancel(request_id_t requestId) noexcept {
-            SPDLOG
+            SPDLOG_DEBUG(FMT_STRING("[FFI] cancelling request {:d}"), requestId);
            inner_.cancel(requestId);
        }
    };