text-generation-inference/router/src/validation.rs

use crate::validation::ValidationError::EmptyInput;
/// Payload validation logic
use crate::{GenerateParameters, GenerateRequest};
use rand::rngs::ThreadRng;
use rand::Rng;
use text_generation_client::{NextTokenChooserParameters, StoppingCriteriaParameters};
use thiserror::Error;
use tokenizers::tokenizer::Tokenizer;
use tokio::sync::{mpsc, oneshot};
use tracing::{instrument, Span};

/// Validation
#[derive(Debug, Clone)]
pub struct Validation {
    /// Channel to communicate with the background validation task
    sender: mpsc::UnboundedSender<ValidationRequest>,
}

impl Validation {
    pub(crate) fn new(
        workers: usize,
        tokenizer: Tokenizer,
        max_stop_sequences: usize,
        max_input_length: usize,
        max_total_tokens: usize,
    ) -> Self {
        // Create channel
        let (validation_sender, validation_receiver) = mpsc::unbounded_channel();

        // Launch background validation task
        tokio::spawn(validation_task(
            workers,
            tokenizer,
            max_stop_sequences,
            max_input_length,
            max_total_tokens,
            validation_receiver,
        ));

        Self {
            sender: validation_sender,
        }
    }

    /// Validate a payload and get the number of tokens in the input
    #[instrument(skip_all)]
    pub(crate) async fn validate(
        &self,
        request: GenerateRequest,
    ) -> Result<ValidGenerateRequest, ValidationError> {
        // Create response channel
        let (sender, receiver) = oneshot::channel();
        // Send request to the background validation task
        // Unwrap is safe here
        self.sender
            .send((request, sender, Span::current()))
            .unwrap();
        // Await on response channel
        // Unwrap is safe here
        receiver.await.unwrap()
    }
}

/// Validation task
/// Load balance the validation requests between multiple validation workers
async fn validation_task(
    workers: usize,
    tokenizer: Tokenizer,
    max_stop_sequences: usize,
    max_input_length: usize,
    max_total_tokens: usize,
    mut receiver: mpsc::UnboundedReceiver<ValidationRequest>,
) {
    let mut workers_senders = Vec::with_capacity(workers);

    // Create workers
    for _ in 0..workers {
        let tokenizer_clone: Tokenizer = tokenizer.clone().into();
        // Create channel to communicate with worker
        let (worker_sender, worker_receiver) = mpsc::channel(workers);
        workers_senders.push(worker_sender);

        // Spawn worker
        tokio::task::spawn_blocking(move || {
            validation_worker(
                tokenizer_clone,
                max_stop_sequences,
                max_input_length,
                max_total_tokens,
                worker_receiver,
            )
        });
    }

    loop {
        // Load balance requests between workers
        for sender in workers_senders.iter() {
            if let Some(validation_request) = receiver.recv().await {
                sender.send(validation_request).await.unwrap();
            } else {
                return;
            }
        }
    }
}

/// Check the parameters inside the payload and get the number of tokens inside the input using
/// the tokenizer
fn validation_worker(
    tokenizer: Tokenizer,
    max_stop_sequences: usize,
    max_input_length: usize,
    max_total_tokens: usize,
    mut receiver: mpsc::Receiver<ValidationRequest>,
) {
    // Seed rng
    let mut rng = rand::thread_rng();

    // Loop over requests
    while let Some((request, response_tx, parent_span)) = receiver.blocking_recv() {
        parent_span.in_scope(|| {
            response_tx
                .send(
                    validate(
                        request,
                        &tokenizer,
                        max_stop_sequences,
                        max_input_length,
                        max_total_tokens,
                        &mut rng,
                    )
                    .map_err(|err| {
                        metrics::increment_counter!("tgi_request_failure", "err" => "validation");
                        tracing::error!("{err}");
                        err
                    }),
                )
                .unwrap_or(())
        })
    }
}

fn validate(
    request: GenerateRequest,
    tokenizer: &Tokenizer,
    max_stop_sequences: usize,
    max_input_length: usize,
    max_total_tokens: usize,
    rng: &mut ThreadRng,
) -> Result<ValidGenerateRequest, ValidationError> {
    let GenerateParameters {
        temperature,
        repetition_penalty,
        top_k,
        top_p,
        do_sample,
        max_new_tokens,
        stop: stop_sequences,
        seed,
        ..
    } = request.parameters;

    let temperature = temperature.unwrap_or(1.0);
    if temperature <= 0.0 {
        return Err(ValidationError::Temperature);
    }

    let repetition_penalty = repetition_penalty.unwrap_or(1.0);
    if repetition_penalty <= 0.0 {
        return Err(ValidationError::RepetitionPenalty);
    }

    let top_p = top_p.unwrap_or(1.0);
    if top_p <= 0.0 || top_p > 1.0 {
        return Err(ValidationError::TopP);
    }

    // Different because the proto default value is 0 while it is not a valid value
    // for the user
    let top_k: u32 = match top_k {
        None => Ok(0),
        Some(top_k) => {
            if top_k <= 0 {
                return Err(ValidationError::TopK);
            }
            Ok(top_k as u32)
        }
    }?;

    if max_new_tokens == 0 {
        return Err(ValidationError::MaxNewTokens);
    }

    if stop_sequences.len() > max_stop_sequences {
        return Err(ValidationError::StopSequence(
            max_stop_sequences,
            stop_sequences.len(),
        ));
    }

    // If seed is None, assign a random one
    let seed = match seed {
        None => rng.gen(),
        Some(seed) => seed,
    };

    // Check if inputs is empty
    if request.inputs.is_empty() {
        return Err(EmptyInput);
    }

    // Get the number of tokens in the input
    match tokenizer.encode(request.inputs.clone(), true) {
        Ok(encoding) => {
            let input_length = encoding.len();
            let total_tokens = input_length + max_new_tokens as usize;

            if input_length > max_input_length {
                Err(ValidationError::InputLength(max_input_length, input_length))
            } else if total_tokens > max_total_tokens {
                Err(ValidationError::MaxTotalTokens(
                    max_total_tokens,
                    input_length,
                    max_new_tokens,
                ))
            } else {
                // Return ValidGenerateRequest
                let parameters = NextTokenChooserParameters {
                    temperature,
                    repetition_penalty,
                    top_k,
                    top_p,
                    do_sample,
                    seed,
                };
                let stopping_parameters = StoppingCriteriaParameters {
                    max_new_tokens,
                    stop_sequences,
                };

                metrics::histogram!("tgi_request_input_length", input_length as f64);
                metrics::histogram!("tgi_request_max_new_tokens", max_new_tokens as f64);

                Ok(ValidGenerateRequest {
                    inputs: request.inputs,
                    input_length: input_length as u32,
                    parameters,
                    stopping_parameters,
                })
            }
        }
        Err(err) => Err(ValidationError::Tokenizer(err.to_string())),
    }
}

type ValidationRequest = (
    GenerateRequest,
    oneshot::Sender<Result<ValidGenerateRequest, ValidationError>>,
    Span,
);

#[derive(Debug)]
pub(crate) struct ValidGenerateRequest {
    pub inputs: String,
    pub input_length: u32,
    pub parameters: NextTokenChooserParameters,
    pub stopping_parameters: StoppingCriteriaParameters,
}

#[derive(Error, Debug)]
pub enum ValidationError {
    #[error("temperature must be strictly positive")]
    Temperature,
    #[error("repetition_penalty must be strictly positive")]
    RepetitionPenalty,
    #[error("top_p must be > 0.0 and <= 1.0")]
    TopP,
    #[error("top_k must be strictly positive")]
    TopK,
    #[error("max_new_tokens must be strictly positive")]
    MaxNewTokens,
    #[error("input tokens + max_new_tokens must be <= {0}. Given: {1} input tokens and {2} max_new_tokens")]
    MaxTotalTokens(usize, usize, u32),
    #[error("inputs must have less than {0} tokens. Given: {1}")]
    InputLength(usize, usize),
    #[error("inputs cannot be empty")]
    EmptyInput,
    #[error("stop supports up to {0} stop sequences. Given: {1}")]
    StopSequence(usize, usize),
    #[error("tokenizer error {0}")]
    Tokenizer(String),
}
feat(router): add max_total_tokens and empty_input validation (#68) closes #65 2023-02-15 20:56:59 +00:00			`use crate::validation::ValidationError::EmptyInput;`
v0.1.0 2022-10-18 13:19:03 +00:00			`/// Payload validation logic`
feat: Add token streaming using ServerSideEvents support (#41) 2023-01-31 16:04:00 +00:00			`use crate::{GenerateParameters, GenerateRequest};`
fix(server): fix seeding with multiple shards (#44) 2023-01-31 15:01:15 +00:00			`use rand::rngs::ThreadRng;`
			`use rand::Rng;`
feat: Add token streaming using ServerSideEvents support (#41) 2023-01-31 16:04:00 +00:00			`use text_generation_client::{NextTokenChooserParameters, StoppingCriteriaParameters};`
feat: Improve error handling 2022-10-17 12:59:00 +00:00			`use thiserror::Error;`
Refactored gRPC interface Added validation logic 2022-10-11 14:50:54 +00:00			`use tokenizers::tokenizer::Tokenizer;`
			`use tokio::sync::{mpsc, oneshot};`
feat: add distributed tracing (#62) 2023-02-13 12:02:45 +00:00			`use tracing::{instrument, Span};`
Refactored gRPC interface Added validation logic 2022-10-11 14:50:54 +00:00
v0.1.0 2022-10-18 13:19:03 +00:00			`/// Validation`
Refactored gRPC interface Added validation logic 2022-10-11 14:50:54 +00:00			`#[derive(Debug, Clone)]`
feat: Add arguments to CLI 2022-10-17 16:27:33 +00:00			`pub struct Validation {`
v0.1.0 2022-10-18 13:19:03 +00:00			`/// Channel to communicate with the background validation task`
feat(router): add prometheus metrics scrape endpoint (#71) 2023-02-16 16:18:53 +00:00			`sender: mpsc::UnboundedSender<ValidationRequest>,`
Refactored gRPC interface Added validation logic 2022-10-11 14:50:54 +00:00			`}`

			`impl Validation {`
feat(router): add max_total_tokens and empty_input validation (#68) closes #65 2023-02-15 20:56:59 +00:00			`pub(crate) fn new(`
			`workers: usize,`
			`tokenizer: Tokenizer,`
			`max_stop_sequences: usize,`
			`max_input_length: usize,`
			`max_total_tokens: usize,`
			`) -> Self {`
feat(router): use background task to manage request queue (#52) Co-authored-by: Nick Hill <nickhill@us.ibm.com> 2023-02-02 13:59:27 +00:00			`// Create channel`
feat(router): add prometheus metrics scrape endpoint (#71) 2023-02-16 16:18:53 +00:00			`let (validation_sender, validation_receiver) = mpsc::unbounded_channel();`
Refactored gRPC interface Added validation logic 2022-10-11 14:50:54 +00:00
v0.1.0 2022-10-18 13:19:03 +00:00			`// Launch background validation task`
			`tokio::spawn(validation_task(`
			`workers,`
			`tokenizer,`
feat(router): add max_total_tokens and empty_input validation (#68) closes #65 2023-02-15 20:56:59 +00:00			`max_stop_sequences,`
v0.1.0 2022-10-18 13:19:03 +00:00			`max_input_length,`
feat(router): add max_total_tokens and empty_input validation (#68) closes #65 2023-02-15 20:56:59 +00:00			`max_total_tokens,`
v0.1.0 2022-10-18 13:19:03 +00:00			`validation_receiver,`
			`));`
Refactored gRPC interface Added validation logic 2022-10-11 14:50:54 +00:00
			`Self {`
			`sender: validation_sender,`
			`}`
			`}`

v0.1.0 2022-10-18 13:19:03 +00:00			`/// Validate a payload and get the number of tokens in the input`
feat: add distributed tracing (#62) 2023-02-13 12:02:45 +00:00			`#[instrument(skip_all)]`
Refactored gRPC interface Added validation logic 2022-10-11 14:50:54 +00:00			`pub(crate) async fn validate(`
			`&self,`
			`request: GenerateRequest,`
feat: Add token streaming using ServerSideEvents support (#41) 2023-01-31 16:04:00 +00:00			`) -> Result<ValidGenerateRequest, ValidationError> {`
v0.1.0 2022-10-18 13:19:03 +00:00			`// Create response channel`
Refactored gRPC interface Added validation logic 2022-10-11 14:50:54 +00:00			`let (sender, receiver) = oneshot::channel();`
v0.1.0 2022-10-18 13:19:03 +00:00			`// Send request to the background validation task`
			`// Unwrap is safe here`
feat: add distributed tracing (#62) 2023-02-13 12:02:45 +00:00			`self.sender`
			`.send((request, sender, Span::current()))`
			`.unwrap();`
v0.1.0 2022-10-18 13:19:03 +00:00			`// Await on response channel`
			`// Unwrap is safe here`
Refactored gRPC interface Added validation logic 2022-10-11 14:50:54 +00:00			`receiver.await.unwrap()`
			`}`
			`}`

v0.1.0 2022-10-18 13:19:03 +00:00			`/// Validation task`
			`/// Load balance the validation requests between multiple validation workers`
			`async fn validation_task(`
			`workers: usize,`
			`tokenizer: Tokenizer,`
feat(router): add max_total_tokens and empty_input validation (#68) closes #65 2023-02-15 20:56:59 +00:00			`max_stop_sequences: usize,`
v0.1.0 2022-10-18 13:19:03 +00:00			`max_input_length: usize,`
feat(router): add max_total_tokens and empty_input validation (#68) closes #65 2023-02-15 20:56:59 +00:00			`max_total_tokens: usize,`
feat(router): add prometheus metrics scrape endpoint (#71) 2023-02-16 16:18:53 +00:00			`mut receiver: mpsc::UnboundedReceiver<ValidationRequest>,`
v0.1.0 2022-10-18 13:19:03 +00:00			`) {`
			`let mut workers_senders = Vec::with_capacity(workers);`

			`// Create workers`
			`for _ in 0..workers {`
feat(router): Add const parameters to validation logic (#15) I noticed some opportunity to collapse some of the logic, in case you are interested. 2023-01-03 09:41:22 +00:00			`let tokenizer_clone: Tokenizer = tokenizer.clone().into();`
v0.1.0 2022-10-18 13:19:03 +00:00			`// Create channel to communicate with worker`
			`let (worker_sender, worker_receiver) = mpsc::channel(workers);`
			`workers_senders.push(worker_sender);`

			`// Spawn worker`
			`tokio::task::spawn_blocking(move \|\| {`
feat(router): add max_total_tokens and empty_input validation (#68) closes #65 2023-02-15 20:56:59 +00:00			`validation_worker(`
			`tokenizer_clone,`
			`max_stop_sequences,`
			`max_input_length,`
			`max_total_tokens,`
			`worker_receiver,`
			`)`
v0.1.0 2022-10-18 13:19:03 +00:00			`});`
			`}`

			`loop {`
			`// Load balance requests between workers`
			`for sender in workers_senders.iter() {`
			`if let Some(validation_request) = receiver.recv().await {`
			`sender.send(validation_request).await.unwrap();`
			`} else {`
			`return;`
			`}`
			`}`
			`}`
			`}`

			`/// Check the parameters inside the payload and get the number of tokens inside the input using`
			`/// the tokenizer`
			`fn validation_worker(`
feat(router): Add const parameters to validation logic (#15) I noticed some opportunity to collapse some of the logic, in case you are interested. 2023-01-03 09:41:22 +00:00			`tokenizer: Tokenizer,`
feat(router): add max_total_tokens and empty_input validation (#68) closes #65 2023-02-15 20:56:59 +00:00			`max_stop_sequences: usize,`
v0.1.0 2022-10-18 13:19:03 +00:00			`max_input_length: usize,`
feat(router): add max_total_tokens and empty_input validation (#68) closes #65 2023-02-15 20:56:59 +00:00			`max_total_tokens: usize,`
v0.1.0 2022-10-18 13:19:03 +00:00			`mut receiver: mpsc::Receiver<ValidationRequest>,`
			`) {`
fix(server): fix seeding with multiple shards (#44) 2023-01-31 15:01:15 +00:00			`// Seed rng`
			`let mut rng = rand::thread_rng();`

v0.1.0 2022-10-18 13:19:03 +00:00			`// Loop over requests`
feat: add distributed tracing (#62) 2023-02-13 12:02:45 +00:00			`while let Some((request, response_tx, parent_span)) = receiver.blocking_recv() {`
			`parent_span.in_scope(\|\| {`
			`response_tx`
			`.send(`
feat(router): add max_total_tokens and empty_input validation (#68) closes #65 2023-02-15 20:56:59 +00:00			`validate(`
			`request,`
			`&tokenizer,`
			`max_stop_sequences,`
			`max_input_length,`
			`max_total_tokens,`
			`&mut rng,`
			`)`
			`.map_err(\|err\| {`
feat(router): add prometheus metrics scrape endpoint (#71) 2023-02-16 16:18:53 +00:00			`metrics::increment_counter!("tgi_request_failure", "err" => "validation");`
feat: add distributed tracing (#62) 2023-02-13 12:02:45 +00:00			`tracing::error!("{err}");`
			`err`
			`}),`
			`)`
			`.unwrap_or(())`
			`})`
feat(router): Add const parameters to validation logic (#15) I noticed some opportunity to collapse some of the logic, in case you are interested. 2023-01-03 09:41:22 +00:00			`}`
			`}`
Refactored gRPC interface Added validation logic 2022-10-11 14:50:54 +00:00
feat(router): Add const parameters to validation logic (#15) I noticed some opportunity to collapse some of the logic, in case you are interested. 2023-01-03 09:41:22 +00:00			`fn validate(`
feat: Add token streaming using ServerSideEvents support (#41) 2023-01-31 16:04:00 +00:00			`request: GenerateRequest,`
feat(router): Add const parameters to validation logic (#15) I noticed some opportunity to collapse some of the logic, in case you are interested. 2023-01-03 09:41:22 +00:00			`tokenizer: &Tokenizer,`
feat(router): add max_total_tokens and empty_input validation (#68) closes #65 2023-02-15 20:56:59 +00:00			`max_stop_sequences: usize,`
feat(router): Add const parameters to validation logic (#15) I noticed some opportunity to collapse some of the logic, in case you are interested. 2023-01-03 09:41:22 +00:00			`max_input_length: usize,`
feat(router): add max_total_tokens and empty_input validation (#68) closes #65 2023-02-15 20:56:59 +00:00			`max_total_tokens: usize,`
fix(server): fix seeding with multiple shards (#44) 2023-01-31 15:01:15 +00:00			`rng: &mut ThreadRng,`
feat: Add token streaming using ServerSideEvents support (#41) 2023-01-31 16:04:00 +00:00			`) -> Result<ValidGenerateRequest, ValidationError> {`
feat(router): refactor API and add openAPI schemas (#53) 2023-02-03 11:43:37 +00:00			`let GenerateParameters {`
			`temperature,`
			`repetition_penalty,`
			`top_k,`
			`top_p,`
			`do_sample,`
			`max_new_tokens,`
			`stop: stop_sequences,`
			`seed,`
			`..`
			`} = request.parameters;`

			`let temperature = temperature.unwrap_or(1.0);`
			`if temperature <= 0.0 {`
feat(router): Add const parameters to validation logic (#15) I noticed some opportunity to collapse some of the logic, in case you are interested. 2023-01-03 09:41:22 +00:00			`return Err(ValidationError::Temperature);`
			`}`
feat(router): refactor API and add openAPI schemas (#53) 2023-02-03 11:43:37 +00:00
			`let repetition_penalty = repetition_penalty.unwrap_or(1.0);`
			`if repetition_penalty <= 0.0 {`
feat(server): support repetition penalty (#47) 2023-02-01 14:58:42 +00:00			`return Err(ValidationError::RepetitionPenalty);`
			`}`
feat(router): refactor API and add openAPI schemas (#53) 2023-02-03 11:43:37 +00:00
			`let top_p = top_p.unwrap_or(1.0);`
			`if top_p <= 0.0 \|\| top_p > 1.0 {`
feat(router): Add const parameters to validation logic (#15) I noticed some opportunity to collapse some of the logic, in case you are interested. 2023-01-03 09:41:22 +00:00			`return Err(ValidationError::TopP);`
			`}`
feat(router): refactor API and add openAPI schemas (#53) 2023-02-03 11:43:37 +00:00
			`// Different because the proto default value is 0 while it is not a valid value`
			`// for the user`
			`let top_k: u32 = match top_k {`
			`None => Ok(0),`
			`Some(top_k) => {`
			`if top_k <= 0 {`
			`return Err(ValidationError::TopK);`
			`}`
			`Ok(top_k as u32)`
			`}`
			`}?;`

feat(router): add max_total_tokens and empty_input validation (#68) closes #65 2023-02-15 20:56:59 +00:00			`if max_new_tokens == 0 {`
			`return Err(ValidationError::MaxNewTokens);`
feat(router): Add const parameters to validation logic (#15) I noticed some opportunity to collapse some of the logic, in case you are interested. 2023-01-03 09:41:22 +00:00			`}`
feat(router): refactor API and add openAPI schemas (#53) 2023-02-03 11:43:37 +00:00
feat(router): add max_total_tokens and empty_input validation (#68) closes #65 2023-02-15 20:56:59 +00:00			`if stop_sequences.len() > max_stop_sequences {`
feat(router): Add const parameters to validation logic (#15) I noticed some opportunity to collapse some of the logic, in case you are interested. 2023-01-03 09:41:22 +00:00			`return Err(ValidationError::StopSequence(`
feat(router): add max_total_tokens and empty_input validation (#68) closes #65 2023-02-15 20:56:59 +00:00			`max_stop_sequences,`
feat(router): refactor API and add openAPI schemas (#53) 2023-02-03 11:43:37 +00:00			`stop_sequences.len(),`
feat(server): Support SantaCoder (#26) 2023-01-20 11:24:39 +00:00			`));`
feat(router): Add const parameters to validation logic (#15) I noticed some opportunity to collapse some of the logic, in case you are interested. 2023-01-03 09:41:22 +00:00			`}`

fix(server): fix seeding with multiple shards (#44) 2023-01-31 15:01:15 +00:00			`// If seed is None, assign a random one`
feat(router): refactor API and add openAPI schemas (#53) 2023-02-03 11:43:37 +00:00			`let seed = match seed {`
feat: Add token streaming using ServerSideEvents support (#41) 2023-01-31 16:04:00 +00:00			`None => rng.gen(),`
			`Some(seed) => seed,`
			`};`
fix(server): fix seeding with multiple shards (#44) 2023-01-31 15:01:15 +00:00
feat(router): add max_total_tokens and empty_input validation (#68) closes #65 2023-02-15 20:56:59 +00:00			`// Check if inputs is empty`
			`if request.inputs.is_empty() {`
			`return Err(EmptyInput);`
			`}`

feat(router): Add const parameters to validation logic (#15) I noticed some opportunity to collapse some of the logic, in case you are interested. 2023-01-03 09:41:22 +00:00			`// Get the number of tokens in the input`
			`match tokenizer.encode(request.inputs.clone(), true) {`
feat: Add token streaming using ServerSideEvents support (#41) 2023-01-31 16:04:00 +00:00			`Ok(encoding) => {`
			`let input_length = encoding.len();`
feat(router): add max_total_tokens and empty_input validation (#68) closes #65 2023-02-15 20:56:59 +00:00			`let total_tokens = input_length + max_new_tokens as usize;`
feat(router): add prometheus metrics scrape endpoint (#71) 2023-02-16 16:18:53 +00:00
feat(router): Add const parameters to validation logic (#15) I noticed some opportunity to collapse some of the logic, in case you are interested. 2023-01-03 09:41:22 +00:00			`if input_length > max_input_length {`
feat(router): add max_total_tokens and empty_input validation (#68) closes #65 2023-02-15 20:56:59 +00:00			`Err(ValidationError::InputLength(max_input_length, input_length))`
			`} else if total_tokens > max_total_tokens {`
			`Err(ValidationError::MaxTotalTokens(`
			`max_total_tokens,`
			`input_length,`
			`max_new_tokens,`
			`))`
feat(router): Add const parameters to validation logic (#15) I noticed some opportunity to collapse some of the logic, in case you are interested. 2023-01-03 09:41:22 +00:00			`} else {`
feat: Add token streaming using ServerSideEvents support (#41) 2023-01-31 16:04:00 +00:00			`// Return ValidGenerateRequest`
			`let parameters = NextTokenChooserParameters {`
			`temperature,`
feat(server): support repetition penalty (#47) 2023-02-01 14:58:42 +00:00			`repetition_penalty,`
feat(router): refactor API and add openAPI schemas (#53) 2023-02-03 11:43:37 +00:00			`top_k,`
feat: Add token streaming using ServerSideEvents support (#41) 2023-01-31 16:04:00 +00:00			`top_p,`
			`do_sample,`
			`seed,`
			`};`
			`let stopping_parameters = StoppingCriteriaParameters {`
			`max_new_tokens,`
			`stop_sequences,`
			`};`

feat(router): add prometheus metrics scrape endpoint (#71) 2023-02-16 16:18:53 +00:00			`metrics::histogram!("tgi_request_input_length", input_length as f64);`
			`metrics::histogram!("tgi_request_max_new_tokens", max_new_tokens as f64);`

feat: Add token streaming using ServerSideEvents support (#41) 2023-01-31 16:04:00 +00:00			`Ok(ValidGenerateRequest {`
			`inputs: request.inputs,`
			`input_length: input_length as u32,`
			`parameters,`
			`stopping_parameters,`
			`})`
fix(router): Handle tokenizer errors 2022-11-14 16:15:19 +00:00			`}`
feat(server): Support SantaCoder (#26) 2023-01-20 11:24:39 +00:00			`}`
feat(router): Add const parameters to validation logic (#15) I noticed some opportunity to collapse some of the logic, in case you are interested. 2023-01-03 09:41:22 +00:00			`Err(err) => Err(ValidationError::Tokenizer(err.to_string())),`
Refactored gRPC interface Added validation logic 2022-10-11 14:50:54 +00:00			`}`
			`}`
v0.1.0 2022-10-18 13:19:03 +00:00
			`type ValidationRequest = (`
			`GenerateRequest,`
feat: Add token streaming using ServerSideEvents support (#41) 2023-01-31 16:04:00 +00:00			`oneshot::Sender<Result<ValidGenerateRequest, ValidationError>>,`
feat: add distributed tracing (#62) 2023-02-13 12:02:45 +00:00			`Span,`
v0.1.0 2022-10-18 13:19:03 +00:00			`);`

feat: Add token streaming using ServerSideEvents support (#41) 2023-01-31 16:04:00 +00:00			`#[derive(Debug)]`
			`pub(crate) struct ValidGenerateRequest {`
			`pub inputs: String,`
			`pub input_length: u32,`
			`pub parameters: NextTokenChooserParameters,`
			`pub stopping_parameters: StoppingCriteriaParameters,`
			`}`

v0.1.0 2022-10-18 13:19:03 +00:00			`#[derive(Error, Debug)]`
			`pub enum ValidationError {`
feat(server): Support bitsandbytes 2022-10-27 12:25:29 +00:00			`#[error("temperature must be strictly positive")]`
v0.1.0 2022-10-18 13:19:03 +00:00			`Temperature,`
feat(server): support repetition penalty (#47) 2023-02-01 14:58:42 +00:00			`#[error("repetition_penalty must be strictly positive")]`
			`RepetitionPenalty,`
fix(batching): Avoid theoretical hang in batcher loop (#5) - Avoid theoretical hang in batcher loop - Avoid a couple of clones in the router generate method - Keep attention mask tensors as integers - Remove num_heads attribute Co-authored-by: OlivierDehaene <Olivier.dehaene@gmail.com> 2022-12-05 09:10:59 +00:00			`#[error("top_p must be > 0.0 and <= 1.0")]`
v0.1.0 2022-10-18 13:19:03 +00:00			`TopP,`
feat(server): Support bitsandbytes 2022-10-27 12:25:29 +00:00			`#[error("top_k must be strictly positive")]`
v0.1.0 2022-10-18 13:19:03 +00:00			`TopK,`
feat(router): add max_total_tokens and empty_input validation (#68) closes #65 2023-02-15 20:56:59 +00:00			`#[error("max_new_tokens must be strictly positive")]`
			`MaxNewTokens,`
			`#[error("input tokens + max_new_tokens must be <= {0}. Given: {1} input tokens and {2} max_new_tokens")]`
			`MaxTotalTokens(usize, usize, u32),`
			`#[error("inputs must have less than {0} tokens. Given: {1}")]`
fix(validation): Fix error messages 2022-10-21 08:59:15 +00:00			`InputLength(usize, usize),`
feat(router): add max_total_tokens and empty_input validation (#68) closes #65 2023-02-15 20:56:59 +00:00			`#[error("inputs cannot be empty")]`
			`EmptyInput,`
feat(router): Add const parameters to validation logic (#15) I noticed some opportunity to collapse some of the logic, in case you are interested. 2023-01-03 09:41:22 +00:00			`#[error("stop supports up to {0} stop sequences. Given: {1}")]`
			`StopSequence(usize, usize),`
fix(router): Handle tokenizer errors 2022-11-14 16:15:19 +00:00			`#[error("tokenizer error {0}")]`
			`Tokenizer(String),`
v0.1.0 2022-10-18 13:19:03 +00:00			`}`