text-generation-inference/backends/trtllm/src/backend.rs

use std::path::Path;

use async_trait::async_trait;
use cxx::UniquePtr;
use tokio_stream::wrappers::UnboundedReceiverStream;

use text_generation_router::infer::{Backend, InferError, InferStreamResponse};
use text_generation_router::validation::ValidGenerateRequest;

use crate::errors::TensorRtLlmBackendError;
use crate::ffi::{create_trtllm_backend, TensorRtLlmBackend};

pub struct TrtLLmBackend {
    inner: UniquePtr<TensorRtLlmBackend>,
}

unsafe impl Sync for TrtLLmBackend {}
unsafe impl Send for TrtLLmBackend {}

impl TrtLLmBackend {
    pub fn new<P: AsRef<Path>>(engine_folder: P) -> Result<Self, TensorRtLlmBackendError> {
        let engine_folder = engine_folder.as_ref();
        let inner = create_trtllm_backend(engine_folder.to_str().unwrap());

        Ok(Self { inner })
    }
}

#[async_trait]
impl Backend for TrtLLmBackend {
    fn schedule(
        &self,
        _request: ValidGenerateRequest,
    ) -> Result<UnboundedReceiverStream<Result<InferStreamResponse, InferError>>, InferError> {
        todo!()
    }

    async fn health(&self, _current_health: bool) -> bool {
        true
    }
}
Working FFI call for TGI and TRTLLM backend 2024-07-01 13:53:23 +00:00			`use std::path::Path;`

			`use async_trait::async_trait;`
			`use cxx::UniquePtr;`
Initial setup for CXX binding to TRTLLM 2024-06-30 21:37:20 +00:00			`use tokio_stream::wrappers::UnboundedReceiverStream;`

			`use text_generation_router::infer::{Backend, InferError, InferStreamResponse};`
			`use text_generation_router::validation::ValidGenerateRequest;`

Working FFI call for TGI and TRTLLM backend 2024-07-01 13:53:23 +00:00			`use crate::errors::TensorRtLlmBackendError;`
			`use crate::ffi::{create_trtllm_backend, TensorRtLlmBackend};`

			`pub struct TrtLLmBackend {`
			`inner: UniquePtr<TensorRtLlmBackend>,`
			`}`

			`unsafe impl Sync for TrtLLmBackend {}`
			`unsafe impl Send for TrtLLmBackend {}`

			`impl TrtLLmBackend {`
			`pub fn new<P: AsRef<Path>>(engine_folder: P) -> Result<Self, TensorRtLlmBackendError> {`
			`let engine_folder = engine_folder.as_ref();`
			`let inner = create_trtllm_backend(engine_folder.to_str().unwrap());`
Initial setup for CXX binding to TRTLLM 2024-06-30 21:37:20 +00:00
Working FFI call for TGI and TRTLLM backend 2024-07-01 13:53:23 +00:00			`Ok(Self { inner })`
			`}`
			`}`

			`#[async_trait]`
			`impl Backend for TrtLLmBackend {`
Initial setup for CXX binding to TRTLLM 2024-06-30 21:37:20 +00:00			`fn schedule(`
			`&self,`
Working FFI call for TGI and TRTLLM backend 2024-07-01 13:53:23 +00:00			`_request: ValidGenerateRequest,`
Initial setup for CXX binding to TRTLLM 2024-06-30 21:37:20 +00:00			`) -> Result<UnboundedReceiverStream<Result<InferStreamResponse, InferError>>, InferError> {`
			`todo!()`
			`}`

Working FFI call for TGI and TRTLLM backend 2024-07-01 13:53:23 +00:00			`async fn health(&self, _current_health: bool) -> bool {`
			`true`
Initial setup for CXX binding to TRTLLM 2024-06-30 21:37:20 +00:00			`}`
			`}`