mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-08-01 12:50:17 +00:00
feat(backend): added some logging
This commit is contained in:
parent
6d3565759a
commit
9bb6309712
@ -1,5 +1,4 @@
|
|||||||
#include <ranges>
|
#include <ranges>
|
||||||
#include <utility>
|
|
||||||
#include "backend.hpp"
|
#include "backend.hpp"
|
||||||
|
|
||||||
#include <spdlog/spdlog.h>
|
#include <spdlog/spdlog.h>
|
||||||
@ -12,7 +11,7 @@ namespace huggingface::tgi::backends::trtllm {
|
|||||||
|
|
||||||
std::expected<request_id_t, backend_exception_t>
|
std::expected<request_id_t, backend_exception_t>
|
||||||
backend_t::submit(std::span<tle::TokenIdType> token_ids, generation_params_t generation_params, sampling_params_t sampling_params) noexcept {
|
backend_t::submit(std::span<tle::TokenIdType> token_ids, generation_params_t generation_params, sampling_params_t sampling_params) noexcept {
|
||||||
SPDLOG_DEBUG(FMT_STRING("Submitting {:d} tokens to the executor for scheduling"), token_ids.size());
|
SPDLOG_DEBUG("Submitting {:d} tokens to the executor for scheduling ({}, {})", token_ids.size(), generation_params, sampling_params);
|
||||||
return executor_.enqueueRequest(tle::Request {
|
return executor_.enqueueRequest(tle::Request {
|
||||||
{token_ids.begin(), token_ids.end()}, // Making actual copy of the tokens
|
{token_ids.begin(), token_ids.end()}, // Making actual copy of the tokens
|
||||||
static_cast<tle::SizeType32>(generation_params.max_new_tokens),
|
static_cast<tle::SizeType32>(generation_params.max_new_tokens),
|
||||||
@ -28,11 +27,12 @@ namespace huggingface::tgi::backends::trtllm {
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::vector<tle::Response> backend_t::pull_tokens() noexcept {
|
std::vector<tle::Response> backend_t::pull_tokens() noexcept {
|
||||||
|
SPDLOG_TRACE(FMT_STRING("Pulling out tokens ({:d} available)"), num_tokens_ready());
|
||||||
return executor_.awaitResponses();
|
return executor_.awaitResponses();
|
||||||
}
|
}
|
||||||
|
|
||||||
void backend_t::cancel(request_id_t request_id) noexcept {
|
void backend_t::cancel(request_id_t request_id) noexcept {
|
||||||
SPDLOG_INFO(FMT_STRING("Cancelling request: {:d}"), request_id);
|
SPDLOG_TRACE(FMT_STRING("Cancelling request: {:d}"), request_id);
|
||||||
executor_.cancelRequest(request_id);
|
executor_.cancelRequest(request_id);
|
||||||
}
|
}
|
||||||
}
|
}
|
@ -5,6 +5,7 @@
|
|||||||
#include <list>
|
#include <list>
|
||||||
#include <span>
|
#include <span>
|
||||||
|
|
||||||
|
#include <spdlog/fmt/fmt.h>
|
||||||
#include <tensorrt_llm/executor/executor.h>
|
#include <tensorrt_llm/executor/executor.h>
|
||||||
|
|
||||||
namespace huggingface::tgi::backends::trtllm {
|
namespace huggingface::tgi::backends::trtllm {
|
||||||
@ -98,3 +99,19 @@ namespace huggingface::tgi::backends::trtllm {
|
|||||||
void cancel(request_id_t) noexcept;
|
void cancel(request_id_t) noexcept;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <> struct fmt::formatter<huggingface::tgi::backends::trtllm::generation_params_t>: formatter<string_view> {
|
||||||
|
auto format(huggingface::tgi::backends::trtllm::generation_params_t c, format_context& ctx) const -> format_context::iterator {
|
||||||
|
return format_to(ctx.out(), "generation_params_t{{ max_new_tokens={:d} }}", c.max_new_tokens);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <> struct fmt::formatter<huggingface::tgi::backends::trtllm::sampling_params_t>: formatter<string_view> {
|
||||||
|
auto format(huggingface::tgi::backends::trtllm::sampling_params_t c, format_context& ctx) const -> format_context::iterator {
|
||||||
|
return format_to(
|
||||||
|
ctx.out(),
|
||||||
|
"sampling_params_t{{ top_k={:d}, top_p={:.3f}, repetition_penalty={:.3f}, frequency_penalty={:.3f}, length_penalty={:.3f}, temperature={:.3f}, seed={:d} }}",
|
||||||
|
c.top_k, c.top_p, c.repetition_penalty, c.frequency_penalty, c.length_penalty, c.temperature, c.seed
|
||||||
|
);
|
||||||
|
}
|
||||||
|
};
|
@ -10,10 +10,12 @@ namespace rust::behavior {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#include <spdlog/spdlog.h>
|
||||||
|
#include <spdlog/pattern_formatter.h>
|
||||||
|
#include <spdlog/fmt/fmt.h>
|
||||||
#include <backend.hpp>
|
#include <backend.hpp>
|
||||||
|
|
||||||
namespace huggingface::tgi::backends::trtllm {
|
namespace huggingface::tgi::backends::trtllm {
|
||||||
|
|
||||||
class tensorrt_llm_backend_t {
|
class tensorrt_llm_backend_t {
|
||||||
private:
|
private:
|
||||||
backend_t inner_;
|
backend_t inner_;
|
||||||
@ -35,9 +37,12 @@ namespace huggingface::tgi::backends::trtllm {
|
|||||||
float_t frequency_penalty,
|
float_t frequency_penalty,
|
||||||
uint64_t seed
|
uint64_t seed
|
||||||
) {
|
) {
|
||||||
|
// This is enabled only if using add_compile_definitions(SPDLOG_ACTIVE_LEVEL=SPDLOG_LEVEL_TRACE)
|
||||||
|
SPDLOG_TRACE(FMT_STRING("[FFI] Submitting {:d} prompt tokens to the executor"));
|
||||||
|
|
||||||
// Submit the request to the executor and get back a potential request_id used to track request status
|
// Submit the request to the executor and get back a potential request_id used to track request status
|
||||||
const auto maybe_request_id = inner_.submit(
|
const auto maybe_request_id = inner_.submit(
|
||||||
{tokens_.data(), tokens.size()},
|
{tokens.data(), tokens.size()},
|
||||||
{max_new_tokens},
|
{max_new_tokens},
|
||||||
{top_k, top_p, repetition_penalty, frequency_penalty, temperature, seed}
|
{top_k, top_p, repetition_penalty, frequency_penalty, temperature, seed}
|
||||||
);
|
);
|
||||||
@ -46,12 +51,12 @@ namespace huggingface::tgi::backends::trtllm {
|
|||||||
if(maybe_request_id.has_value()) [[likely]] {
|
if(maybe_request_id.has_value()) [[likely]] {
|
||||||
return *maybe_request_id;
|
return *maybe_request_id;
|
||||||
} else {
|
} else {
|
||||||
|
SPDLOG_WARN("[FFI] Failed to submit request to the executor");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void cancel(request_id_t requestId) noexcept {
|
void cancel(request_id_t requestId) noexcept {
|
||||||
SPDLOG
|
SPDLOG_DEBUG(FMT_STRING("[FFI] cancelling request {:d}"), requestId);
|
||||||
inner_.cancel(requestId);
|
inner_.cancel(requestId);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
Loading…
Reference in New Issue
Block a user