From 213acc6e34e1877fde156feed6d619fcba3cf1d7 Mon Sep 17 00:00:00 2001 From: Morgan Funtowicz Date: Wed, 25 Sep 2024 10:08:45 +0000 Subject: [PATCH] (misc) move to latest trtllm --- backends/trtllm/cmake/trtllm.cmake | 2 +- backends/trtllm/src/looper.rs | 11 ++++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/backends/trtllm/cmake/trtllm.cmake b/backends/trtllm/cmake/trtllm.cmake index e59ad4cf..e41a436c 100644 --- a/backends/trtllm/cmake/trtllm.cmake +++ b/backends/trtllm/cmake/trtllm.cmake @@ -23,7 +23,7 @@ endif () fetchcontent_declare( trtllm GIT_REPOSITORY https://github.com/NVIDIA/TensorRT-LLM.git - GIT_TAG a681853d3803ee5893307e812530b5e7004bb6e1 + GIT_TAG 32ed92e4491baf2d54682a21d247e1948cca996e GIT_SHALLOW FALSE ) fetchcontent_makeavailable(trtllm) diff --git a/backends/trtllm/src/looper.rs b/backends/trtllm/src/looper.rs index ba10d9ee..0428a4dc 100644 --- a/backends/trtllm/src/looper.rs +++ b/backends/trtllm/src/looper.rs @@ -8,18 +8,18 @@ use hashbrown::HashMap; use log::warn; use tokenizers::{Encoding, Tokenizer}; use tokio::sync::mpsc::{unbounded_channel, UnboundedReceiver, UnboundedSender}; -use tokio::task::{JoinHandle, spawn_blocking}; +use tokio::task::{spawn_blocking, JoinHandle}; use tokio::time::Instant; use tokio_stream::wrappers::UnboundedReceiverStream; -use tracing::{debug, error}; +use tracing::{debug, error, info}; -use text_generation_router::{FinishReason, Token}; -use text_generation_router::infer::{Backend, GeneratedText, InferError, InferStreamResponse}; use text_generation_router::infer::InferError::{GenerationError, ValidationError}; -use text_generation_router::validation::{Chunk, ValidGenerateRequest}; +use text_generation_router::infer::{Backend, GeneratedText, InferError, InferStreamResponse}; use text_generation_router::validation::ValidationError::{ EmptyInput, Grammar, TopNTokensDisabled, UnsupportedModality, }; +use text_generation_router::validation::{Chunk, ValidGenerateRequest}; +use text_generation_router::{FinishReason, Token}; use crate::errors::TensorRtLlmBackendError; use crate::ffi::{create_tensorrt_llm_backend, GenerationStep, TensorRtLlmBackendImpl}; @@ -128,6 +128,7 @@ fn executor_status_looper( } } + // info!("Num response ready: {}", backend.num_responses_ready()); if backend.num_responses_ready() > 0 { match backend.pin_mut().pull_tokens() { Ok(responses) => {