mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-10 20:04:52 +00:00
add logs
This commit is contained in:
parent
df419a21e0
commit
a8e6a49946
@ -427,6 +427,7 @@ async fn decode(
|
||||
generation_health.store(true, Ordering::Relaxed);
|
||||
// Send generated tokens and filter stopped entries
|
||||
filter_send_generations(generations, entries);
|
||||
tracing::debug!("filter batch");
|
||||
|
||||
// Filter next batch and remove requests that were stopped
|
||||
let next_batch = filter_batch(client, next_batch, entries).await;
|
||||
@ -472,9 +473,11 @@ async fn filter_batch(
|
||||
// Next batch is now empty
|
||||
// Clear it from the Python shards cache
|
||||
// We unwrap here as we need to panic since we cannot recover if this method fails
|
||||
tracing::info!("Call python clear cache");
|
||||
client.clear_cache(Some(id)).await.unwrap();
|
||||
None
|
||||
} else {
|
||||
tracing::info!("Call python filter batch");
|
||||
// Filter Python shard cache
|
||||
// We unwrap here as we need to panic since we cannot recover if this method fails
|
||||
client.filter_batch(id, batch.request_ids).await.unwrap()
|
||||
@ -519,6 +522,7 @@ fn send_responses(
|
||||
) -> Result<bool, Box<SendTimeoutError<Result<InferStreamResponse, InferError>>>> {
|
||||
// Return directly if the channel is disconnected
|
||||
if entry.response_tx.is_disconnected() {
|
||||
tracing::debug!("Disconnected");
|
||||
metrics::increment_counter!("tgi_request_failure", "err" => "dropped");
|
||||
return Ok(true);
|
||||
}
|
||||
@ -527,6 +531,7 @@ fn send_responses(
|
||||
|
||||
if let Some(prefill_tokens) = generation.prefill_tokens {
|
||||
// Send message
|
||||
tracing::debug!("Send prefill");
|
||||
entry.response_tx.send_timeout(
|
||||
Ok(InferStreamResponse::Prefill(prefill_tokens)),
|
||||
Duration::from_millis(10),
|
||||
@ -543,6 +548,7 @@ fn send_responses(
|
||||
|
||||
// generation.top_tokens
|
||||
|
||||
tracing::debug!("Top tokens");
|
||||
let mut top_tokens = Vec::new();
|
||||
if let Some(top_tokens_) = generation.top_tokens {
|
||||
top_tokens.extend(
|
||||
@ -565,6 +571,7 @@ fn send_responses(
|
||||
// Generation has ended
|
||||
stopped = true;
|
||||
// Send message
|
||||
tracing::debug!("send final");
|
||||
entry.response_tx.send_timeout(
|
||||
Ok(InferStreamResponse::End {
|
||||
token,
|
||||
@ -576,6 +583,7 @@ fn send_responses(
|
||||
Duration::from_millis(10),
|
||||
)?;
|
||||
} else {
|
||||
tracing::debug!("send intermediate");
|
||||
// Send message
|
||||
entry.response_tx.send_timeout(
|
||||
Ok(InferStreamResponse::Intermediate { token, top_tokens }),
|
||||
|
Loading…
Reference in New Issue
Block a user