add logs

2025-09-10 20:04:52 +00:00 · 2023-10-20 12:55:50 +02:00 · 2023-10-20 12:55:50 +02:00 · a8e6a49946
commit a8e6a49946
parent df419a21e0
1 changed files with 8 additions and 0 deletions
--- a/router/src/infer.rs
+++ b/router/src/infer.rs
@ -427,6 +427,7 @@ async fn decode(
            generation_health.store(true, Ordering::Relaxed);
            // Send generated tokens and filter stopped entries
            filter_send_generations(generations, entries);
+            tracing::debug!("filter batch");

            // Filter next batch and remove requests that were stopped
            let next_batch = filter_batch(client, next_batch, entries).await;
@ -472,9 +473,11 @@ async fn filter_batch(
        // Next batch is now empty
        // Clear it from the Python shards cache
        // We unwrap here as we need to panic since we cannot recover if this method fails
+        tracing::info!("Call python clear cache");
        client.clear_cache(Some(id)).await.unwrap();
        None
    } else {
+        tracing::info!("Call python filter batch");
        // Filter Python shard cache
        // We unwrap here as we need to panic since we cannot recover if this method fails
        client.filter_batch(id, batch.request_ids).await.unwrap()
@ -519,6 +522,7 @@ fn send_responses(
 ) -> Result<bool, Box<SendTimeoutError<Result<InferStreamResponse, InferError>>>> {
    // Return directly if the channel is disconnected
    if entry.response_tx.is_disconnected() {
+        tracing::debug!("Disconnected");
        metrics::increment_counter!("tgi_request_failure", "err" => "dropped");
        return Ok(true);
    }
@ -527,6 +531,7 @@ fn send_responses(

    if let Some(prefill_tokens) = generation.prefill_tokens {
        // Send message
+        tracing::debug!("Send prefill");
        entry.response_tx.send_timeout(
            Ok(InferStreamResponse::Prefill(prefill_tokens)),
            Duration::from_millis(10),
@ -543,6 +548,7 @@ fn send_responses(

    // generation.top_tokens

+    tracing::debug!("Top tokens");
    let mut top_tokens = Vec::new();
    if let Some(top_tokens_) = generation.top_tokens {
        top_tokens.extend(
@ -565,6 +571,7 @@ fn send_responses(
        // Generation has ended
        stopped = true;
        // Send message
+        tracing::debug!("send final");
        entry.response_tx.send_timeout(
            Ok(InferStreamResponse::End {
                token,
@ -576,6 +583,7 @@ fn send_responses(
            Duration::from_millis(10),
        )?;
    } else {
+        tracing::debug!("send intermediate");
        // Send message
        entry.response_tx.send_timeout(
            Ok(InferStreamResponse::Intermediate { token, top_tokens }),