feat(backend): added some logging

This commit is contained in:
Morgan Funtowicz 2024-11-30 23:04:57 +01:00
parent 6d3565759a
commit 9bb6309712
3 changed files with 30 additions and 8 deletions

View File

@ -1,5 +1,4 @@
#include <ranges>
#include <utility>
#include "backend.hpp"
#include <spdlog/spdlog.h>
@ -12,7 +11,7 @@ namespace huggingface::tgi::backends::trtllm {
std::expected<request_id_t, backend_exception_t>
backend_t::submit(std::span<tle::TokenIdType> token_ids, generation_params_t generation_params, sampling_params_t sampling_params) noexcept {
SPDLOG_DEBUG(FMT_STRING("Submitting {:d} tokens to the executor for scheduling"), token_ids.size());
SPDLOG_DEBUG("Submitting {:d} tokens to the executor for scheduling ({}, {})", token_ids.size(), generation_params, sampling_params);
return executor_.enqueueRequest(tle::Request {
{token_ids.begin(), token_ids.end()}, // Making actual copy of the tokens
static_cast<tle::SizeType32>(generation_params.max_new_tokens),
@ -28,11 +27,12 @@ namespace huggingface::tgi::backends::trtllm {
}
std::vector<tle::Response> backend_t::pull_tokens() noexcept {
SPDLOG_TRACE(FMT_STRING("Pulling out tokens ({:d} available)"), num_tokens_ready());
return executor_.awaitResponses();
}
void backend_t::cancel(request_id_t request_id) noexcept {
SPDLOG_INFO(FMT_STRING("Cancelling request: {:d}"), request_id);
SPDLOG_TRACE(FMT_STRING("Cancelling request: {:d}"), request_id);
executor_.cancelRequest(request_id);
}
}

View File

@ -5,6 +5,7 @@
#include <list>
#include <span>
#include <spdlog/fmt/fmt.h>
#include <tensorrt_llm/executor/executor.h>
namespace huggingface::tgi::backends::trtllm {
@ -97,4 +98,20 @@ namespace huggingface::tgi::backends::trtllm {
*/
void cancel(request_id_t) noexcept;
};
}
}
template <> struct fmt::formatter<huggingface::tgi::backends::trtllm::generation_params_t>: formatter<string_view> {
auto format(huggingface::tgi::backends::trtllm::generation_params_t c, format_context& ctx) const -> format_context::iterator {
return format_to(ctx.out(), "generation_params_t{{ max_new_tokens={:d} }}", c.max_new_tokens);
}
};
template <> struct fmt::formatter<huggingface::tgi::backends::trtllm::sampling_params_t>: formatter<string_view> {
auto format(huggingface::tgi::backends::trtllm::sampling_params_t c, format_context& ctx) const -> format_context::iterator {
return format_to(
ctx.out(),
"sampling_params_t{{ top_k={:d}, top_p={:.3f}, repetition_penalty={:.3f}, frequency_penalty={:.3f}, length_penalty={:.3f}, temperature={:.3f}, seed={:d} }}",
c.top_k, c.top_p, c.repetition_penalty, c.frequency_penalty, c.length_penalty, c.temperature, c.seed
);
}
};

View File

@ -10,10 +10,12 @@ namespace rust::behavior {
}
}
#include <spdlog/spdlog.h>
#include <spdlog/pattern_formatter.h>
#include <spdlog/fmt/fmt.h>
#include <backend.hpp>
namespace huggingface::tgi::backends::trtllm {
class tensorrt_llm_backend_t {
private:
backend_t inner_;
@ -35,9 +37,12 @@ namespace huggingface::tgi::backends::trtllm {
float_t frequency_penalty,
uint64_t seed
) {
// This is enabled only if using add_compile_definitions(SPDLOG_ACTIVE_LEVEL=SPDLOG_LEVEL_TRACE)
SPDLOG_TRACE(FMT_STRING("[FFI] Submitting {:d} prompt tokens to the executor"));
// Submit the request to the executor and get back a potential request_id used to track request status
const auto maybe_request_id = inner_.submit(
{tokens_.data(), tokens.size()},
{tokens.data(), tokens.size()},
{max_new_tokens},
{top_k, top_p, repetition_penalty, frequency_penalty, temperature, seed}
);
@ -46,12 +51,12 @@ namespace huggingface::tgi::backends::trtllm {
if(maybe_request_id.has_value()) [[likely]] {
return *maybe_request_id;
} else {
SPDLOG_WARN("[FFI] Failed to submit request to the executor");
}
}
void cancel(request_id_t requestId) noexcept {
SPDLOG
SPDLOG_DEBUG(FMT_STRING("[FFI] cancelling request {:d}"), requestId);
inner_.cancel(requestId);
}
};