diff --git a/backends/trtllm/src/backend.rs b/backends/trtllm/src/backend.rs index 5d8f53b6..296c7373 100644 --- a/backends/trtllm/src/backend.rs +++ b/backends/trtllm/src/backend.rs @@ -41,7 +41,6 @@ pub struct GenerationContext { tokens: Vec, done: Arc, start: Instant, - span: Span, } impl Stream for Generation { @@ -94,6 +93,9 @@ unsafe impl Sync for TensorRtLlmBackendImpl {} /// Implements the logic to execute generation with TensorRT-LLM executor API in background pub struct TensorRtLlmBackend { tokenizer: Arc, + + // Backing the backend behind a RwLock to allow concurrent read access to retrieve + // the number of available tokens (read only) in the Generation stream backend: Arc>>, } @@ -140,8 +142,8 @@ impl TensorRtLlmBackend { temperature: f32, seed: u64, ) { - let tokenizer = self.tokenizer.clone(); - let executor = self.backend.clone(); + let tokenizer = Arc::clone(&self.tokenizer); + let executor = Arc::clone(&self.backend); // Let's push this in async context tokio::spawn(async move { @@ -155,11 +157,10 @@ impl TensorRtLlmBackend { // TODO(asap): Do we really need so many shared-ownership? let ctx = Box::new(GenerationContext { sender: sender.clone(), - tokenizer: tokenizer.clone(), + tokenizer, tokens: vec![], done: Arc::clone(&generation.done), start: Instant::now(), - span: Span::current(), }); // We are leaking the context on-purpose to avoid the box being dropped while there are