text-generation-inference/router/client/src/client.rs

/// Single shard Client
use crate::pb::generate::v1::text_generation_service_client::TextGenerationServiceClient;
use crate::pb::generate::v1::*;
use crate::Result;
use grpc_metadata::InjectTelemetryContext;
use tonic::transport::{Channel, Uri};
use tracing::instrument;

/// Text Generation Inference gRPC client
#[derive(Clone)]
pub struct Client {
    stub: TextGenerationServiceClient<Channel>,
}

impl Client {
    /// Returns a client connected to the given url
    pub async fn connect(uri: Uri) -> Result<Self> {
        let channel = Channel::builder(uri).connect().await?;

        Ok(Self {
            stub: TextGenerationServiceClient::new(channel),
        })
    }

    /// Returns a client connected to the given unix socket
    pub async fn connect_uds(path: String) -> Result<Self> {
        let channel = Channel::from_shared("http://[::]:50051".to_string())
            .unwrap()
            .connect_with_connector(tower::service_fn(move |_: Uri| {
                tokio::net::UnixStream::connect(path.clone())
            }))
            .await?;

        Ok(Self {
            stub: TextGenerationServiceClient::new(channel),
        })
    }

    /// Returns a list of uris or unix sockets of all shards
    #[instrument(skip(self))]
    pub async fn service_discovery(&mut self) -> Result<Vec<String>> {
        let request = tonic::Request::new(ServiceDiscoveryRequest {}).inject_context();
        let response = self.stub.service_discovery(request).await?;
        let urls = response
            .into_inner()
            .urls
            .into_iter()
            // Remove unix socket prefix
            .map(|url| match url.strip_prefix("unix://") {
                None => url,
                Some(stripped_url) => stripped_url.to_string(),
            })
            .collect();
        Ok(urls)
    }

    /// Get model info
    #[instrument(skip(self))]
    pub async fn info(&mut self) -> Result<InfoResponse> {
        let request = tonic::Request::new(InfoRequest {}).inject_context();
        let response = self.stub.info(request).await?.into_inner();
        Ok(response)
    }

    /// Clear the past generations cache
    #[instrument(skip(self))]
    pub async fn clear_cache(&mut self, batch_id: Option<u64>) -> Result<()> {
        let request = tonic::Request::new(ClearCacheRequest { id: batch_id }).inject_context();
        self.stub.clear_cache(request).await?;
        Ok(())
    }

    /// Generate one token for each request in the given batch
    ///
    /// Returns Generation for each request in batch
    /// and the next cached batch
    #[instrument(skip_all, fields(id = &batch.id, size = &batch.size))]
    pub async fn prefill(&mut self, batch: Batch) -> Result<(Vec<Generation>, Option<Batch>)> {
        let request = tonic::Request::new(PrefillRequest { batch: Some(batch) }).inject_context();
        let response = self.stub.prefill(request).await?.into_inner();
        Ok((response.generations, response.batch))
    }

    /// Generate one token for each request in the given cached batches
    ///
    /// Returns Generation for each request in batches
    /// and the next cached batch
    #[instrument(skip_all, fields(size = batches.iter().map(|batch|{batch.size}).sum::<u32>()))]
    pub async fn decode(
        &mut self,
        batches: Vec<Batch>,
    ) -> Result<(Vec<Generation>, Option<Batch>)> {
        let request = tonic::Request::new(DecodeRequest { batches }).inject_context();
        let response = self.stub.decode(request).await?.into_inner();
        Ok((response.generations, response.batch))
    }
}
v0.1.0 2022-10-18 13:19:03 +00:00			`/// Single shard Client`
Refactored gRPC interface Added validation logic 2022-10-11 14:50:54 +00:00			`use crate::pb::generate::v1::text_generation_service_client::TextGenerationServiceClient;`
Init 2022-10-08 10:30:12 +00:00			`use crate::pb::generate::v1::*;`
			`use crate::Result;`
feat: add distributed tracing (#62) 2023-02-13 12:02:45 +00:00			`use grpc_metadata::InjectTelemetryContext;`
Init 2022-10-08 10:30:12 +00:00			`use tonic::transport::{Channel, Uri};`
feat: add distributed tracing (#62) 2023-02-13 12:02:45 +00:00			`use tracing::instrument;`
Init 2022-10-08 10:30:12 +00:00
v0.1.0 2022-10-18 13:19:03 +00:00			`/// Text Generation Inference gRPC client`
Init 2022-10-08 10:30:12 +00:00			`#[derive(Clone)]`
			`pub struct Client {`
feat: Improve error handling 2022-10-17 12:59:00 +00:00			`stub: TextGenerationServiceClient<Channel>,`
Init 2022-10-08 10:30:12 +00:00			`}`

			`impl Client {`
feat: Improve error handling 2022-10-17 12:59:00 +00:00			`/// Returns a client connected to the given url`
			`pub async fn connect(uri: Uri) -> Result<Self> {`
			`let channel = Channel::builder(uri).connect().await?;`
Init 2022-10-08 10:30:12 +00:00
feat: Improve error handling 2022-10-17 12:59:00 +00:00			`Ok(Self {`
			`stub: TextGenerationServiceClient::new(channel),`
			`})`
Init 2022-10-08 10:30:12 +00:00			`}`

feat: Improve error handling 2022-10-17 12:59:00 +00:00			`/// Returns a client connected to the given unix socket`
			`pub async fn connect_uds(path: String) -> Result<Self> {`
Refactored gRPC interface Added validation logic 2022-10-11 14:50:54 +00:00			`let channel = Channel::from_shared("http://[::]:50051".to_string())`
Init 2022-10-08 10:30:12 +00:00			`.unwrap()`
			`.connect_with_connector(tower::service_fn(move \|_: Uri\| {`
			`tokio::net::UnixStream::connect(path.clone())`
			`}))`
feat: Improve error handling 2022-10-17 12:59:00 +00:00			`.await?;`
Init 2022-10-08 10:30:12 +00:00
feat: Improve error handling 2022-10-17 12:59:00 +00:00			`Ok(Self {`
			`stub: TextGenerationServiceClient::new(channel),`
			`})`
Init 2022-10-08 10:30:12 +00:00			`}`

v0.1.0 2022-10-18 13:19:03 +00:00			`/// Returns a list of uris or unix sockets of all shards`
Init 2022-10-08 10:30:12 +00:00			`#[instrument(skip(self))]`
			`pub async fn service_discovery(&mut self) -> Result<Vec<String>> {`
feat: add distributed tracing (#62) 2023-02-13 12:02:45 +00:00			`let request = tonic::Request::new(ServiceDiscoveryRequest {}).inject_context();`
			`let response = self.stub.service_discovery(request).await?;`
Init 2022-10-08 10:30:12 +00:00			`let urls = response`
			`.into_inner()`
			`.urls`
			`.into_iter()`
v0.1.0 2022-10-18 13:19:03 +00:00			`// Remove unix socket prefix`
Init 2022-10-08 10:30:12 +00:00			`.map(\|url\| match url.strip_prefix("unix://") {`
			`None => url,`
			`Some(stripped_url) => stripped_url.to_string(),`
			`})`
			`.collect();`
			`Ok(urls)`
			`}`

feat(router): add device and dtype info (#215) 2023-04-21 13:36:29 +00:00			`/// Get model info`
			`#[instrument(skip(self))]`
			`pub async fn info(&mut self) -> Result<InfoResponse> {`
			`let request = tonic::Request::new(InfoRequest {}).inject_context();`
			`let response = self.stub.info(request).await?.into_inner();`
			`Ok(response)`
			`}`

v0.1.0 2022-10-18 13:19:03 +00:00			`/// Clear the past generations cache`
Init 2022-10-08 10:30:12 +00:00			`#[instrument(skip(self))]`
feat(server): clear cache on error (#143) 2023-03-28 09:29:35 +00:00			`pub async fn clear_cache(&mut self, batch_id: Option<u64>) -> Result<()> {`
			`let request = tonic::Request::new(ClearCacheRequest { id: batch_id }).inject_context();`
feat: add distributed tracing (#62) 2023-02-13 12:02:45 +00:00			`self.stub.clear_cache(request).await?;`
Init 2022-10-08 10:30:12 +00:00			`Ok(())`
			`}`

v0.1.0 2022-10-18 13:19:03 +00:00			`/// Generate one token for each request in the given batch`
			`///`
feat: Add token streaming using ServerSideEvents support (#41) 2023-01-31 16:04:00 +00:00			`/// Returns Generation for each request in batch`
v0.1.0 2022-10-18 13:19:03 +00:00			`/// and the next cached batch`
feat: add distributed tracing (#62) 2023-02-13 12:02:45 +00:00			`#[instrument(skip_all, fields(id = &batch.id, size = &batch.size))]`
feat: Add token streaming using ServerSideEvents support (#41) 2023-01-31 16:04:00 +00:00			`pub async fn prefill(&mut self, batch: Batch) -> Result<(Vec<Generation>, Option<Batch>)> {`
feat: add distributed tracing (#62) 2023-02-13 12:02:45 +00:00			`let request = tonic::Request::new(PrefillRequest { batch: Some(batch) }).inject_context();`
			`let response = self.stub.prefill(request).await?.into_inner();`
feat: Add token streaming using ServerSideEvents support (#41) 2023-01-31 16:04:00 +00:00			`Ok((response.generations, response.batch))`
Init 2022-10-08 10:30:12 +00:00			`}`

feat: Add token streaming using ServerSideEvents support (#41) 2023-01-31 16:04:00 +00:00			`/// Generate one token for each request in the given cached batches`
v0.1.0 2022-10-18 13:19:03 +00:00			`///`
feat: Add token streaming using ServerSideEvents support (#41) 2023-01-31 16:04:00 +00:00			`/// Returns Generation for each request in batches`
v0.1.0 2022-10-18 13:19:03 +00:00			`/// and the next cached batch`
feat: add distributed tracing (#62) 2023-02-13 12:02:45 +00:00			`#[instrument(skip_all, fields(size = batches.iter().map(\|batch\|{batch.size}).sum::<u32>()))]`
feat: Add token streaming using ServerSideEvents support (#41) 2023-01-31 16:04:00 +00:00			`pub async fn decode(`
Init 2022-10-08 10:30:12 +00:00			`&mut self,`
Refactored gRPC interface Added validation logic 2022-10-11 14:50:54 +00:00			`batches: Vec<Batch>,`
feat: Add token streaming using ServerSideEvents support (#41) 2023-01-31 16:04:00 +00:00			`) -> Result<(Vec<Generation>, Option<Batch>)> {`
feat: add distributed tracing (#62) 2023-02-13 12:02:45 +00:00			`let request = tonic::Request::new(DecodeRequest { batches }).inject_context();`
			`let response = self.stub.decode(request).await?.into_inner();`
feat: Add token streaming using ServerSideEvents support (#41) 2023-01-31 16:04:00 +00:00			`Ok((response.generations, response.batch))`
Refactored gRPC interface Added validation logic 2022-10-11 14:50:54 +00:00			`}`
Init 2022-10-08 10:30:12 +00:00			`}`