(misc) move to latest trtllm

2025-09-09 11:24:53 +00:00 · 2024-09-25 10:08:45 +00:00 · 2024-09-25 10:08:45 +00:00 · 213acc6e34
commit 213acc6e34
parent 507ff66692
2 changed files with 7 additions and 6 deletions
--- a/backends/trtllm/cmake/trtllm.cmake
+++ b/backends/trtllm/cmake/trtllm.cmake
@ -23,7 +23,7 @@ endif ()
 fetchcontent_declare(
        trtllm
        GIT_REPOSITORY https://github.com/NVIDIA/TensorRT-LLM.git
-        GIT_TAG a681853d3803ee5893307e812530b5e7004bb6e1
+        GIT_TAG 32ed92e4491baf2d54682a21d247e1948cca996e
        GIT_SHALLOW FALSE
 )
 fetchcontent_makeavailable(trtllm)
--- a/backends/trtllm/src/looper.rs
+++ b/backends/trtllm/src/looper.rs
@ -8,18 +8,18 @@ use hashbrown::HashMap;
 use log::warn;
 use tokenizers::{Encoding, Tokenizer};
 use tokio::sync::mpsc::{unbounded_channel, UnboundedReceiver, UnboundedSender};
-use tokio::task::{JoinHandle, spawn_blocking};
+use tokio::task::{spawn_blocking, JoinHandle};
 use tokio::time::Instant;
 use tokio_stream::wrappers::UnboundedReceiverStream;
-use tracing::{debug, error};
+use tracing::{debug, error, info};

-use text_generation_router::{FinishReason, Token};
-use text_generation_router::infer::{Backend, GeneratedText, InferError, InferStreamResponse};
 use text_generation_router::infer::InferError::{GenerationError, ValidationError};
-use text_generation_router::validation::{Chunk, ValidGenerateRequest};
+use text_generation_router::infer::{Backend, GeneratedText, InferError, InferStreamResponse};
 use text_generation_router::validation::ValidationError::{
    EmptyInput, Grammar, TopNTokensDisabled, UnsupportedModality,
 };
+use text_generation_router::validation::{Chunk, ValidGenerateRequest};
+use text_generation_router::{FinishReason, Token};

 use crate::errors::TensorRtLlmBackendError;
 use crate::ffi::{create_tensorrt_llm_backend, GenerationStep, TensorRtLlmBackendImpl};
@ -128,6 +128,7 @@ fn executor_status_looper(
            }
        }

+        // info!("Num response ready: {}", backend.num_responses_ready());
        if backend.num_responses_ready() > 0 {
            match backend.pin_mut().pull_tokens() {
                Ok(responses) => {