text-generation-inference/router/client/src/sharded_client.rs

/// Multi shard Client
use crate::Result;
use crate::{Batch, Client, Generation};
use futures::future::join_all;
use tonic::transport::Uri;
use tracing::instrument;

/// Text Generation Inference gRPC multi client
pub struct ShardedClient {
    clients: Vec<Client>,
}

impl ShardedClient {
    fn new(clients: Vec<Client>) -> Self {
        Self { clients }
    }

    /// Create a new ShardedClient from a master client. The master client will communicate with
    /// the other shards and returns all uris/unix sockets with the `service_discovery` gRPC method.
    async fn from_master_client(mut master_client: Client) -> Result<Self> {
        // Get all uris/unix sockets from the master client
        let uris = master_client.service_discovery().await.unwrap();
        let futures = uris.into_iter().map(Client::connect_uds);
        let clients: Result<Vec<Client>> = join_all(futures).await.into_iter().collect();
        Ok(Self::new(clients?))
    }

    /// Returns a client connected to the given uri
    pub async fn connect(uri: Uri) -> Result<Self> {
        let master_client = Client::connect(uri).await?;
        Self::from_master_client(master_client).await
    }

    /// Returns a client connected to the given unix socket
    pub async fn connect_uds(path: String) -> Result<Self> {
        let master_client = Client::connect_uds(path).await?;
        Self::from_master_client(master_client).await
    }

    /// Clear the past generations cache
    #[instrument(skip(self))]
    pub async fn clear_cache(&mut self, batch_id: Option<u64>) -> Result<()> {
        let futures: Vec<_> = self
            .clients
            .iter_mut()
            .map(|client| client.clear_cache(batch_id))
            .collect();
        join_all(futures).await.into_iter().collect()
    }

    /// Generate one token for each request in the given batch
    ///
    /// Returns Generation for each request in batch
    /// and the next cached batch
    #[instrument(skip_all, fields(id = &batch.id, size = &batch.size))]
    pub async fn prefill(&mut self, batch: Batch) -> Result<(Vec<Generation>, Option<Batch>)> {
        let futures: Vec<_> = self
            .clients
            .iter_mut()
            .map(|client| Box::pin(client.prefill(batch.clone())))
            .collect();
        // all shards return the same message
        join_all(futures).await.pop().unwrap()
    }

    /// Generate one token for each request in the given cached batches
    ///
    /// Returns Generation for each request in batches
    /// and the next cached batch
    #[instrument(skip_all, fields(size = batches.iter().map(|batch|{batch.size}).sum::<u32>()))]
    pub async fn decode(
        &mut self,
        batches: Vec<Batch>,
    ) -> Result<(Vec<Generation>, Option<Batch>)> {
        let futures: Vec<_> = self
            .clients
            .iter_mut()
            .map(|client| Box::pin(client.decode(batches.clone())))
            .collect();
        // all shards return the same message
        join_all(futures).await.pop().unwrap()
    }
}
v0.1.0 2022-10-18 13:19:03 +00:00			`/// Multi shard Client`
Init 2022-10-08 10:30:12 +00:00			`use crate::Result;`
feat: Add token streaming using ServerSideEvents support (#41) 2023-01-31 16:04:00 +00:00			`use crate::{Batch, Client, Generation};`
Init 2022-10-08 10:30:12 +00:00			`use futures::future::join_all;`
			`use tonic::transport::Uri;`
feat: add distributed tracing (#62) 2023-02-13 12:02:45 +00:00			`use tracing::instrument;`
Init 2022-10-08 10:30:12 +00:00
v0.1.0 2022-10-18 13:19:03 +00:00			`/// Text Generation Inference gRPC multi client`
Init 2022-10-08 10:30:12 +00:00			`pub struct ShardedClient {`
feat(client): Simplify sharded logic 2022-10-22 21:40:05 +00:00			`clients: Vec<Client>,`
Init 2022-10-08 10:30:12 +00:00			`}`

			`impl ShardedClient {`
v0.1.0 2022-10-18 13:19:03 +00:00			`fn new(clients: Vec<Client>) -> Self {`
feat(server): Support bitsandbytes 2022-10-27 12:25:29 +00:00			`Self { clients }`
Init 2022-10-08 10:30:12 +00:00			`}`

v0.1.0 2022-10-18 13:19:03 +00:00			`/// Create a new ShardedClient from a master client. The master client will communicate with`
			/// the other shards and returns all uris/unix sockets with the `service_discovery` gRPC method.
feat: Improve error handling 2022-10-17 12:59:00 +00:00			`async fn from_master_client(mut master_client: Client) -> Result<Self> {`
v0.1.0 2022-10-18 13:19:03 +00:00			`// Get all uris/unix sockets from the master client`
Init 2022-10-08 10:30:12 +00:00			`let uris = master_client.service_discovery().await.unwrap();`
v0.1.0 2022-10-18 13:19:03 +00:00			`let futures = uris.into_iter().map(Client::connect_uds);`
feat: Improve error handling 2022-10-17 12:59:00 +00:00			`let clients: Result<Vec<Client>> = join_all(futures).await.into_iter().collect();`
			`Ok(Self::new(clients?))`
Init 2022-10-08 10:30:12 +00:00			`}`

v0.1.0 2022-10-18 13:19:03 +00:00			`/// Returns a client connected to the given uri`
feat: Improve error handling 2022-10-17 12:59:00 +00:00			`pub async fn connect(uri: Uri) -> Result<Self> {`
			`let master_client = Client::connect(uri).await?;`
Init 2022-10-08 10:30:12 +00:00			`Self::from_master_client(master_client).await`
			`}`

feat: Improve error handling 2022-10-17 12:59:00 +00:00			`/// Returns a client connected to the given unix socket`
			`pub async fn connect_uds(path: String) -> Result<Self> {`
			`let master_client = Client::connect_uds(path).await?;`
Init 2022-10-08 10:30:12 +00:00			`Self::from_master_client(master_client).await`
			`}`

feat: Add token streaming using ServerSideEvents support (#41) 2023-01-31 16:04:00 +00:00			`/// Clear the past generations cache`
feat: add distributed tracing (#62) 2023-02-13 12:02:45 +00:00			`#[instrument(skip(self))]`
feat(server): clear cache on error (#143) 2023-03-28 09:29:35 +00:00			`pub async fn clear_cache(&mut self, batch_id: Option<u64>) -> Result<()> {`
feat: Add token streaming using ServerSideEvents support (#41) 2023-01-31 16:04:00 +00:00			`let futures: Vec<_> = self`
			`.clients`
			`.iter_mut()`
feat(server): clear cache on error (#143) 2023-03-28 09:29:35 +00:00			`.map(\|client\| client.clear_cache(batch_id))`
feat: Add token streaming using ServerSideEvents support (#41) 2023-01-31 16:04:00 +00:00			`.collect();`
			`join_all(futures).await.into_iter().collect()`
			`}`

v0.1.0 2022-10-18 13:19:03 +00:00			`/// Generate one token for each request in the given batch`
			`///`
feat: Add token streaming using ServerSideEvents support (#41) 2023-01-31 16:04:00 +00:00			`/// Returns Generation for each request in batch`
v0.1.0 2022-10-18 13:19:03 +00:00			`/// and the next cached batch`
feat: add distributed tracing (#62) 2023-02-13 12:02:45 +00:00			`#[instrument(skip_all, fields(id = &batch.id, size = &batch.size))]`
feat: Add token streaming using ServerSideEvents support (#41) 2023-01-31 16:04:00 +00:00			`pub async fn prefill(&mut self, batch: Batch) -> Result<(Vec<Generation>, Option<Batch>)> {`
feat(client): Simplify sharded logic 2022-10-22 21:40:05 +00:00			`let futures: Vec<_> = self`
			`.clients`
			`.iter_mut()`
feat: Add token streaming using ServerSideEvents support (#41) 2023-01-31 16:04:00 +00:00			`.map(\|client\| Box::pin(client.prefill(batch.clone())))`
feat(client): Simplify sharded logic 2022-10-22 21:40:05 +00:00			`.collect();`
fix(rust-client): use join_all instead of select_all to hopefully fix nccl issues (#162) 2023-04-09 18:07:02 +00:00			`// all shards return the same message`
			`join_all(futures).await.pop().unwrap()`
Init 2022-10-08 10:30:12 +00:00			`}`

feat: Add token streaming using ServerSideEvents support (#41) 2023-01-31 16:04:00 +00:00			`/// Generate one token for each request in the given cached batches`
v0.1.0 2022-10-18 13:19:03 +00:00			`///`
feat: Add token streaming using ServerSideEvents support (#41) 2023-01-31 16:04:00 +00:00			`/// Returns Generation for each request in batches`
v0.1.0 2022-10-18 13:19:03 +00:00			`/// and the next cached batch`
feat: add distributed tracing (#62) 2023-02-13 12:02:45 +00:00			`#[instrument(skip_all, fields(size = batches.iter().map(\|batch\|{batch.size}).sum::<u32>()))]`
feat: Add token streaming using ServerSideEvents support (#41) 2023-01-31 16:04:00 +00:00			`pub async fn decode(`
feat(client): Simplify sharded logic 2022-10-22 21:40:05 +00:00			`&mut self,`
Refactored gRPC interface Added validation logic 2022-10-11 14:50:54 +00:00			`batches: Vec<Batch>,`
feat: Add token streaming using ServerSideEvents support (#41) 2023-01-31 16:04:00 +00:00			`) -> Result<(Vec<Generation>, Option<Batch>)> {`
feat(client): Simplify sharded logic 2022-10-22 21:40:05 +00:00			`let futures: Vec<_> = self`
			`.clients`
			`.iter_mut()`
feat: Add token streaming using ServerSideEvents support (#41) 2023-01-31 16:04:00 +00:00			`.map(\|client\| Box::pin(client.decode(batches.clone())))`
feat(client): Simplify sharded logic 2022-10-22 21:40:05 +00:00			`.collect();`
fix(rust-client): use join_all instead of select_all to hopefully fix nccl issues (#162) 2023-04-09 18:07:02 +00:00			`// all shards return the same message`
			`join_all(futures).await.pop().unwrap()`
Refactored gRPC interface Added validation logic 2022-10-11 14:50:54 +00:00			`}`
Init 2022-10-08 10:30:12 +00:00			`}`