2024-07-01 13:53:23 +00:00
|
|
|
use std::path::Path;
|
|
|
|
|
|
|
|
use async_trait::async_trait;
|
|
|
|
use cxx::UniquePtr;
|
2024-06-30 21:37:20 +00:00
|
|
|
use tokio_stream::wrappers::UnboundedReceiverStream;
|
|
|
|
|
|
|
|
use text_generation_router::infer::{Backend, InferError, InferStreamResponse};
|
|
|
|
use text_generation_router::validation::ValidGenerateRequest;
|
|
|
|
|
2024-07-01 13:53:23 +00:00
|
|
|
use crate::errors::TensorRtLlmBackendError;
|
|
|
|
use crate::ffi::{create_trtllm_backend, TensorRtLlmBackend};
|
|
|
|
|
|
|
|
pub struct TrtLLmBackend {
|
|
|
|
inner: UniquePtr<TensorRtLlmBackend>,
|
|
|
|
}
|
|
|
|
|
|
|
|
unsafe impl Sync for TrtLLmBackend {}
|
|
|
|
unsafe impl Send for TrtLLmBackend {}
|
|
|
|
|
|
|
|
impl TrtLLmBackend {
|
|
|
|
pub fn new<P: AsRef<Path>>(engine_folder: P) -> Result<Self, TensorRtLlmBackendError> {
|
|
|
|
let engine_folder = engine_folder.as_ref();
|
|
|
|
let inner = create_trtllm_backend(engine_folder.to_str().unwrap());
|
2024-06-30 21:37:20 +00:00
|
|
|
|
2024-07-01 13:53:23 +00:00
|
|
|
Ok(Self { inner })
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[async_trait]
|
|
|
|
impl Backend for TrtLLmBackend {
|
2024-06-30 21:37:20 +00:00
|
|
|
fn schedule(
|
|
|
|
&self,
|
2024-07-01 13:53:23 +00:00
|
|
|
_request: ValidGenerateRequest,
|
2024-06-30 21:37:20 +00:00
|
|
|
) -> Result<UnboundedReceiverStream<Result<InferStreamResponse, InferError>>, InferError> {
|
|
|
|
todo!()
|
|
|
|
}
|
|
|
|
|
2024-07-01 13:53:23 +00:00
|
|
|
async fn health(&self, _current_health: bool) -> bool {
|
|
|
|
true
|
2024-06-30 21:37:20 +00:00
|
|
|
}
|
|
|
|
}
|