diff --git a/Cargo.lock b/Cargo.lock index 92367d1e..e470a995 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3896,6 +3896,7 @@ dependencies = [ "cxx", "cxx-build", "log", + "parking_lot", "pkg-config", "text-generation-router", "thiserror", diff --git a/backends/trtllm/Cargo.toml b/backends/trtllm/Cargo.toml index 7079d3d1..43a114ba 100644 --- a/backends/trtllm/Cargo.toml +++ b/backends/trtllm/Cargo.toml @@ -8,17 +8,18 @@ homepage.workspace = true [dependencies] async-trait = "0.1" async-stream = "0.3" +clap = { version = "4.5", features = ["derive"] } cxx = "1.0" +log = { version = "0.4", features = [] } text-generation-router = { path = "../../router" } tokenizers = { version = "0.19", features = ["hf-hub"] } tokio = { version = "1.38", features = ["rt", "rt-multi-thread", "parking_lot", "signal", "sync"] } tokio-stream = "0.1.15" -clap = { version = "4.5", features = ["derive"] } thiserror = "1.0.62" tracing = "0.1" tracing-opentelemetry = "0.24" tracing-subscriber = { version = "0.3", features = ["json", "env-filter"] } -log = { version = "0.4", features = [] } +parking_lot = "0.12" [build-dependencies] cmake = "0.1" diff --git a/backends/trtllm/src/backend.rs b/backends/trtllm/src/backend.rs index b26d06a6..6bf18472 100644 --- a/backends/trtllm/src/backend.rs +++ b/backends/trtllm/src/backend.rs @@ -2,8 +2,8 @@ use std::future::Future; use std::path::Path; use std::pin::{pin, Pin}; use std::str::FromStr; -use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::{Arc, OnceLock}; +use std::sync::atomic::{AtomicBool, Ordering}; use std::task::{Context, Poll}; use std::time::Duration; @@ -12,16 +12,17 @@ use cxx::UniquePtr; use log::{error, warn}; use tokenizers::Tokenizer; use tokio::sync::mpsc::{unbounded_channel, UnboundedSender}; -use tokio::sync::RwLock; -use tokio::time::{sleep, Instant}; -use tokio_stream::wrappers::UnboundedReceiverStream; +use tokio::time::{Instant, sleep}; use tokio_stream::{Stream, StreamExt}; -use tracing::{instrument, span, Level}; +use tokio_stream::wrappers::UnboundedReceiverStream; +use tracing::{instrument, Level, span}; -use text_generation_router::infer::{Backend, GeneratedText, InferError, InferStreamResponse}; -use text_generation_router::validation::ValidationError::UnsupportedModality; -use text_generation_router::validation::{Chunk, ValidGenerateRequest, ValidationError}; +// use tokio::sync::RwLock; +use parking_lot::RwLock; use text_generation_router::{FinishReason, Token}; +use text_generation_router::infer::{Backend, GeneratedText, InferError, InferStreamResponse}; +use text_generation_router::validation::{Chunk, ValidationError, ValidGenerateRequest}; +use text_generation_router::validation::ValidationError::UnsupportedModality; use crate::errors::TensorRtLlmBackendError; use crate::ffi::{create_tensorrt_llm_backend, GenerationStep, TensorRtLlmBackendImpl};