mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-07-30 20:00:16 +00:00
feat(backend): added some logging
This commit is contained in:
parent
6d3565759a
commit
9bb6309712
@ -1,5 +1,4 @@
|
||||
#include <ranges>
|
||||
#include <utility>
|
||||
#include "backend.hpp"
|
||||
|
||||
#include <spdlog/spdlog.h>
|
||||
@ -12,7 +11,7 @@ namespace huggingface::tgi::backends::trtllm {
|
||||
|
||||
std::expected<request_id_t, backend_exception_t>
|
||||
backend_t::submit(std::span<tle::TokenIdType> token_ids, generation_params_t generation_params, sampling_params_t sampling_params) noexcept {
|
||||
SPDLOG_DEBUG(FMT_STRING("Submitting {:d} tokens to the executor for scheduling"), token_ids.size());
|
||||
SPDLOG_DEBUG("Submitting {:d} tokens to the executor for scheduling ({}, {})", token_ids.size(), generation_params, sampling_params);
|
||||
return executor_.enqueueRequest(tle::Request {
|
||||
{token_ids.begin(), token_ids.end()}, // Making actual copy of the tokens
|
||||
static_cast<tle::SizeType32>(generation_params.max_new_tokens),
|
||||
@ -28,11 +27,12 @@ namespace huggingface::tgi::backends::trtllm {
|
||||
}
|
||||
|
||||
std::vector<tle::Response> backend_t::pull_tokens() noexcept {
|
||||
SPDLOG_TRACE(FMT_STRING("Pulling out tokens ({:d} available)"), num_tokens_ready());
|
||||
return executor_.awaitResponses();
|
||||
}
|
||||
|
||||
void backend_t::cancel(request_id_t request_id) noexcept {
|
||||
SPDLOG_INFO(FMT_STRING("Cancelling request: {:d}"), request_id);
|
||||
SPDLOG_TRACE(FMT_STRING("Cancelling request: {:d}"), request_id);
|
||||
executor_.cancelRequest(request_id);
|
||||
}
|
||||
}
|
@ -5,6 +5,7 @@
|
||||
#include <list>
|
||||
#include <span>
|
||||
|
||||
#include <spdlog/fmt/fmt.h>
|
||||
#include <tensorrt_llm/executor/executor.h>
|
||||
|
||||
namespace huggingface::tgi::backends::trtllm {
|
||||
@ -97,4 +98,20 @@ namespace huggingface::tgi::backends::trtllm {
|
||||
*/
|
||||
void cancel(request_id_t) noexcept;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
template <> struct fmt::formatter<huggingface::tgi::backends::trtllm::generation_params_t>: formatter<string_view> {
|
||||
auto format(huggingface::tgi::backends::trtllm::generation_params_t c, format_context& ctx) const -> format_context::iterator {
|
||||
return format_to(ctx.out(), "generation_params_t{{ max_new_tokens={:d} }}", c.max_new_tokens);
|
||||
}
|
||||
};
|
||||
|
||||
template <> struct fmt::formatter<huggingface::tgi::backends::trtllm::sampling_params_t>: formatter<string_view> {
|
||||
auto format(huggingface::tgi::backends::trtllm::sampling_params_t c, format_context& ctx) const -> format_context::iterator {
|
||||
return format_to(
|
||||
ctx.out(),
|
||||
"sampling_params_t{{ top_k={:d}, top_p={:.3f}, repetition_penalty={:.3f}, frequency_penalty={:.3f}, length_penalty={:.3f}, temperature={:.3f}, seed={:d} }}",
|
||||
c.top_k, c.top_p, c.repetition_penalty, c.frequency_penalty, c.length_penalty, c.temperature, c.seed
|
||||
);
|
||||
}
|
||||
};
|
@ -10,10 +10,12 @@ namespace rust::behavior {
|
||||
}
|
||||
}
|
||||
|
||||
#include <spdlog/spdlog.h>
|
||||
#include <spdlog/pattern_formatter.h>
|
||||
#include <spdlog/fmt/fmt.h>
|
||||
#include <backend.hpp>
|
||||
|
||||
namespace huggingface::tgi::backends::trtllm {
|
||||
|
||||
class tensorrt_llm_backend_t {
|
||||
private:
|
||||
backend_t inner_;
|
||||
@ -35,9 +37,12 @@ namespace huggingface::tgi::backends::trtllm {
|
||||
float_t frequency_penalty,
|
||||
uint64_t seed
|
||||
) {
|
||||
// This is enabled only if using add_compile_definitions(SPDLOG_ACTIVE_LEVEL=SPDLOG_LEVEL_TRACE)
|
||||
SPDLOG_TRACE(FMT_STRING("[FFI] Submitting {:d} prompt tokens to the executor"));
|
||||
|
||||
// Submit the request to the executor and get back a potential request_id used to track request status
|
||||
const auto maybe_request_id = inner_.submit(
|
||||
{tokens_.data(), tokens.size()},
|
||||
{tokens.data(), tokens.size()},
|
||||
{max_new_tokens},
|
||||
{top_k, top_p, repetition_penalty, frequency_penalty, temperature, seed}
|
||||
);
|
||||
@ -46,12 +51,12 @@ namespace huggingface::tgi::backends::trtllm {
|
||||
if(maybe_request_id.has_value()) [[likely]] {
|
||||
return *maybe_request_id;
|
||||
} else {
|
||||
|
||||
SPDLOG_WARN("[FFI] Failed to submit request to the executor");
|
||||
}
|
||||
}
|
||||
|
||||
void cancel(request_id_t requestId) noexcept {
|
||||
SPDLOG
|
||||
SPDLOG_DEBUG(FMT_STRING("[FFI] cancelling request {:d}"), requestId);
|
||||
inner_.cancel(requestId);
|
||||
}
|
||||
};
|
||||
|
Loading…
Reference in New Issue
Block a user