2023-02-15 20:56:59 +00:00
|
|
|
use crate::validation::ValidationError::EmptyInput;
|
2022-10-18 13:19:03 +00:00
|
|
|
/// Payload validation logic
|
2023-01-31 16:04:00 +00:00
|
|
|
use crate::{GenerateParameters, GenerateRequest};
|
2023-01-31 15:01:15 +00:00
|
|
|
use rand::rngs::ThreadRng;
|
|
|
|
use rand::Rng;
|
2023-01-31 16:04:00 +00:00
|
|
|
use text_generation_client::{NextTokenChooserParameters, StoppingCriteriaParameters};
|
2022-10-17 12:59:00 +00:00
|
|
|
use thiserror::Error;
|
2022-10-11 14:50:54 +00:00
|
|
|
use tokenizers::tokenizer::Tokenizer;
|
|
|
|
use tokio::sync::{mpsc, oneshot};
|
2023-02-13 12:02:45 +00:00
|
|
|
use tracing::{instrument, Span};
|
2022-10-11 14:50:54 +00:00
|
|
|
|
2022-10-18 13:19:03 +00:00
|
|
|
/// Validation
|
2022-10-11 14:50:54 +00:00
|
|
|
#[derive(Debug, Clone)]
|
2022-10-17 16:27:33 +00:00
|
|
|
pub struct Validation {
|
2022-10-18 13:19:03 +00:00
|
|
|
/// Channel to communicate with the background validation task
|
2023-02-16 16:18:53 +00:00
|
|
|
sender: mpsc::UnboundedSender<ValidationRequest>,
|
2022-10-11 14:50:54 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
impl Validation {
|
2023-02-15 20:56:59 +00:00
|
|
|
pub(crate) fn new(
|
|
|
|
workers: usize,
|
|
|
|
tokenizer: Tokenizer,
|
|
|
|
max_stop_sequences: usize,
|
|
|
|
max_input_length: usize,
|
|
|
|
max_total_tokens: usize,
|
|
|
|
) -> Self {
|
2023-02-02 13:59:27 +00:00
|
|
|
// Create channel
|
2023-02-16 16:18:53 +00:00
|
|
|
let (validation_sender, validation_receiver) = mpsc::unbounded_channel();
|
2022-10-11 14:50:54 +00:00
|
|
|
|
2022-10-18 13:19:03 +00:00
|
|
|
// Launch background validation task
|
|
|
|
tokio::spawn(validation_task(
|
|
|
|
workers,
|
|
|
|
tokenizer,
|
2023-02-15 20:56:59 +00:00
|
|
|
max_stop_sequences,
|
2022-10-18 13:19:03 +00:00
|
|
|
max_input_length,
|
2023-02-15 20:56:59 +00:00
|
|
|
max_total_tokens,
|
2022-10-18 13:19:03 +00:00
|
|
|
validation_receiver,
|
|
|
|
));
|
2022-10-11 14:50:54 +00:00
|
|
|
|
|
|
|
Self {
|
|
|
|
sender: validation_sender,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-10-18 13:19:03 +00:00
|
|
|
/// Validate a payload and get the number of tokens in the input
|
2023-02-13 12:02:45 +00:00
|
|
|
#[instrument(skip_all)]
|
2022-10-11 14:50:54 +00:00
|
|
|
pub(crate) async fn validate(
|
|
|
|
&self,
|
|
|
|
request: GenerateRequest,
|
2023-01-31 16:04:00 +00:00
|
|
|
) -> Result<ValidGenerateRequest, ValidationError> {
|
2022-10-18 13:19:03 +00:00
|
|
|
// Create response channel
|
2022-10-11 14:50:54 +00:00
|
|
|
let (sender, receiver) = oneshot::channel();
|
2022-10-18 13:19:03 +00:00
|
|
|
// Send request to the background validation task
|
|
|
|
// Unwrap is safe here
|
2023-02-13 12:02:45 +00:00
|
|
|
self.sender
|
|
|
|
.send((request, sender, Span::current()))
|
|
|
|
.unwrap();
|
2022-10-18 13:19:03 +00:00
|
|
|
// Await on response channel
|
|
|
|
// Unwrap is safe here
|
2022-10-11 14:50:54 +00:00
|
|
|
receiver.await.unwrap()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-10-18 13:19:03 +00:00
|
|
|
/// Validation task
|
|
|
|
/// Load balance the validation requests between multiple validation workers
|
|
|
|
async fn validation_task(
|
|
|
|
workers: usize,
|
|
|
|
tokenizer: Tokenizer,
|
2023-02-15 20:56:59 +00:00
|
|
|
max_stop_sequences: usize,
|
2022-10-18 13:19:03 +00:00
|
|
|
max_input_length: usize,
|
2023-02-15 20:56:59 +00:00
|
|
|
max_total_tokens: usize,
|
2023-02-16 16:18:53 +00:00
|
|
|
mut receiver: mpsc::UnboundedReceiver<ValidationRequest>,
|
2022-10-18 13:19:03 +00:00
|
|
|
) {
|
|
|
|
let mut workers_senders = Vec::with_capacity(workers);
|
|
|
|
|
|
|
|
// Create workers
|
|
|
|
for _ in 0..workers {
|
2023-01-03 09:41:22 +00:00
|
|
|
let tokenizer_clone: Tokenizer = tokenizer.clone().into();
|
2022-10-18 13:19:03 +00:00
|
|
|
// Create channel to communicate with worker
|
|
|
|
let (worker_sender, worker_receiver) = mpsc::channel(workers);
|
|
|
|
workers_senders.push(worker_sender);
|
|
|
|
|
|
|
|
// Spawn worker
|
|
|
|
tokio::task::spawn_blocking(move || {
|
2023-02-15 20:56:59 +00:00
|
|
|
validation_worker(
|
|
|
|
tokenizer_clone,
|
|
|
|
max_stop_sequences,
|
|
|
|
max_input_length,
|
|
|
|
max_total_tokens,
|
|
|
|
worker_receiver,
|
|
|
|
)
|
2022-10-18 13:19:03 +00:00
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
loop {
|
|
|
|
// Load balance requests between workers
|
|
|
|
for sender in workers_senders.iter() {
|
|
|
|
if let Some(validation_request) = receiver.recv().await {
|
|
|
|
sender.send(validation_request).await.unwrap();
|
|
|
|
} else {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Check the parameters inside the payload and get the number of tokens inside the input using
|
|
|
|
/// the tokenizer
|
|
|
|
fn validation_worker(
|
2023-01-03 09:41:22 +00:00
|
|
|
tokenizer: Tokenizer,
|
2023-02-15 20:56:59 +00:00
|
|
|
max_stop_sequences: usize,
|
2022-10-18 13:19:03 +00:00
|
|
|
max_input_length: usize,
|
2023-02-15 20:56:59 +00:00
|
|
|
max_total_tokens: usize,
|
2022-10-18 13:19:03 +00:00
|
|
|
mut receiver: mpsc::Receiver<ValidationRequest>,
|
|
|
|
) {
|
2023-01-31 15:01:15 +00:00
|
|
|
// Seed rng
|
|
|
|
let mut rng = rand::thread_rng();
|
|
|
|
|
2022-10-18 13:19:03 +00:00
|
|
|
// Loop over requests
|
2023-02-13 12:02:45 +00:00
|
|
|
while let Some((request, response_tx, parent_span)) = receiver.blocking_recv() {
|
|
|
|
parent_span.in_scope(|| {
|
|
|
|
response_tx
|
|
|
|
.send(
|
2023-02-15 20:56:59 +00:00
|
|
|
validate(
|
|
|
|
request,
|
|
|
|
&tokenizer,
|
|
|
|
max_stop_sequences,
|
|
|
|
max_input_length,
|
|
|
|
max_total_tokens,
|
|
|
|
&mut rng,
|
|
|
|
)
|
|
|
|
.map_err(|err| {
|
2023-02-16 16:18:53 +00:00
|
|
|
metrics::increment_counter!("tgi_request_failure", "err" => "validation");
|
2023-02-13 12:02:45 +00:00
|
|
|
tracing::error!("{err}");
|
|
|
|
err
|
|
|
|
}),
|
|
|
|
)
|
|
|
|
.unwrap_or(())
|
|
|
|
})
|
2023-01-03 09:41:22 +00:00
|
|
|
}
|
|
|
|
}
|
2022-10-11 14:50:54 +00:00
|
|
|
|
2023-01-03 09:41:22 +00:00
|
|
|
fn validate(
|
2023-01-31 16:04:00 +00:00
|
|
|
request: GenerateRequest,
|
2023-01-03 09:41:22 +00:00
|
|
|
tokenizer: &Tokenizer,
|
2023-02-15 20:56:59 +00:00
|
|
|
max_stop_sequences: usize,
|
2023-01-03 09:41:22 +00:00
|
|
|
max_input_length: usize,
|
2023-02-15 20:56:59 +00:00
|
|
|
max_total_tokens: usize,
|
2023-01-31 15:01:15 +00:00
|
|
|
rng: &mut ThreadRng,
|
2023-01-31 16:04:00 +00:00
|
|
|
) -> Result<ValidGenerateRequest, ValidationError> {
|
2023-02-03 11:43:37 +00:00
|
|
|
let GenerateParameters {
|
|
|
|
temperature,
|
|
|
|
repetition_penalty,
|
|
|
|
top_k,
|
|
|
|
top_p,
|
2023-03-09 10:33:57 +00:00
|
|
|
typical_p,
|
2023-02-03 11:43:37 +00:00
|
|
|
do_sample,
|
|
|
|
max_new_tokens,
|
|
|
|
stop: stop_sequences,
|
|
|
|
seed,
|
2023-03-02 11:30:41 +00:00
|
|
|
watermark,
|
2023-02-03 11:43:37 +00:00
|
|
|
..
|
|
|
|
} = request.parameters;
|
|
|
|
|
|
|
|
let temperature = temperature.unwrap_or(1.0);
|
|
|
|
if temperature <= 0.0 {
|
2023-01-03 09:41:22 +00:00
|
|
|
return Err(ValidationError::Temperature);
|
|
|
|
}
|
2023-02-03 11:43:37 +00:00
|
|
|
|
|
|
|
let repetition_penalty = repetition_penalty.unwrap_or(1.0);
|
|
|
|
if repetition_penalty <= 0.0 {
|
2023-02-01 14:58:42 +00:00
|
|
|
return Err(ValidationError::RepetitionPenalty);
|
|
|
|
}
|
2023-02-03 11:43:37 +00:00
|
|
|
|
2023-03-09 10:33:57 +00:00
|
|
|
// Different because the proto default value is not a valid value
|
2023-02-03 11:43:37 +00:00
|
|
|
// for the user
|
2023-03-09 10:33:57 +00:00
|
|
|
let top_p = top_p
|
|
|
|
.map(|value| {
|
|
|
|
if value <= 0.0 || value >= 1.0 {
|
|
|
|
return Err(ValidationError::TopP);
|
|
|
|
}
|
|
|
|
Ok(value)
|
|
|
|
})
|
|
|
|
.unwrap_or(Ok(1.0))?;
|
|
|
|
|
|
|
|
let typical_p = typical_p
|
|
|
|
.map(|value| {
|
|
|
|
if value <= 0.0 || value >= 1.0 {
|
|
|
|
return Err(ValidationError::TypicalP);
|
|
|
|
}
|
|
|
|
Ok(value)
|
|
|
|
})
|
|
|
|
.unwrap_or(Ok(1.0))?;
|
|
|
|
|
|
|
|
let top_k: u32 = top_k
|
|
|
|
.map(|value| {
|
|
|
|
if value <= 0 {
|
2023-02-03 11:43:37 +00:00
|
|
|
return Err(ValidationError::TopK);
|
|
|
|
}
|
2023-03-09 10:33:57 +00:00
|
|
|
Ok(value as u32)
|
|
|
|
})
|
|
|
|
.unwrap_or(Ok(0))?;
|
2023-02-03 11:43:37 +00:00
|
|
|
|
2023-02-15 20:56:59 +00:00
|
|
|
if max_new_tokens == 0 {
|
|
|
|
return Err(ValidationError::MaxNewTokens);
|
2023-01-03 09:41:22 +00:00
|
|
|
}
|
2023-02-03 11:43:37 +00:00
|
|
|
|
2023-02-15 20:56:59 +00:00
|
|
|
if stop_sequences.len() > max_stop_sequences {
|
2023-01-03 09:41:22 +00:00
|
|
|
return Err(ValidationError::StopSequence(
|
2023-02-15 20:56:59 +00:00
|
|
|
max_stop_sequences,
|
2023-02-03 11:43:37 +00:00
|
|
|
stop_sequences.len(),
|
2023-01-20 11:24:39 +00:00
|
|
|
));
|
2023-01-03 09:41:22 +00:00
|
|
|
}
|
|
|
|
|
2023-01-31 15:01:15 +00:00
|
|
|
// If seed is None, assign a random one
|
2023-02-03 11:43:37 +00:00
|
|
|
let seed = match seed {
|
2023-01-31 16:04:00 +00:00
|
|
|
None => rng.gen(),
|
|
|
|
Some(seed) => seed,
|
|
|
|
};
|
2023-01-31 15:01:15 +00:00
|
|
|
|
2023-02-15 20:56:59 +00:00
|
|
|
// Check if inputs is empty
|
|
|
|
if request.inputs.is_empty() {
|
|
|
|
return Err(EmptyInput);
|
|
|
|
}
|
|
|
|
|
2023-01-03 09:41:22 +00:00
|
|
|
// Get the number of tokens in the input
|
|
|
|
match tokenizer.encode(request.inputs.clone(), true) {
|
2023-01-31 16:04:00 +00:00
|
|
|
Ok(encoding) => {
|
|
|
|
let input_length = encoding.len();
|
2023-02-15 20:56:59 +00:00
|
|
|
let total_tokens = input_length + max_new_tokens as usize;
|
2023-02-16 16:18:53 +00:00
|
|
|
|
2023-01-03 09:41:22 +00:00
|
|
|
if input_length > max_input_length {
|
2023-02-15 20:56:59 +00:00
|
|
|
Err(ValidationError::InputLength(max_input_length, input_length))
|
|
|
|
} else if total_tokens > max_total_tokens {
|
|
|
|
Err(ValidationError::MaxTotalTokens(
|
|
|
|
max_total_tokens,
|
|
|
|
input_length,
|
|
|
|
max_new_tokens,
|
|
|
|
))
|
2023-01-03 09:41:22 +00:00
|
|
|
} else {
|
2023-01-31 16:04:00 +00:00
|
|
|
// Return ValidGenerateRequest
|
|
|
|
let parameters = NextTokenChooserParameters {
|
|
|
|
temperature,
|
2023-02-01 14:58:42 +00:00
|
|
|
repetition_penalty,
|
2023-02-03 11:43:37 +00:00
|
|
|
top_k,
|
2023-01-31 16:04:00 +00:00
|
|
|
top_p,
|
2023-03-09 10:33:57 +00:00
|
|
|
typical_p,
|
2023-01-31 16:04:00 +00:00
|
|
|
do_sample,
|
|
|
|
seed,
|
2023-03-02 11:30:41 +00:00
|
|
|
watermark,
|
2023-01-31 16:04:00 +00:00
|
|
|
};
|
|
|
|
let stopping_parameters = StoppingCriteriaParameters {
|
|
|
|
max_new_tokens,
|
|
|
|
stop_sequences,
|
|
|
|
};
|
|
|
|
|
2023-02-16 16:18:53 +00:00
|
|
|
metrics::histogram!("tgi_request_input_length", input_length as f64);
|
|
|
|
metrics::histogram!("tgi_request_max_new_tokens", max_new_tokens as f64);
|
|
|
|
|
2023-01-31 16:04:00 +00:00
|
|
|
Ok(ValidGenerateRequest {
|
|
|
|
inputs: request.inputs,
|
|
|
|
input_length: input_length as u32,
|
|
|
|
parameters,
|
|
|
|
stopping_parameters,
|
|
|
|
})
|
2022-11-14 16:15:19 +00:00
|
|
|
}
|
2023-01-20 11:24:39 +00:00
|
|
|
}
|
2023-01-03 09:41:22 +00:00
|
|
|
Err(err) => Err(ValidationError::Tokenizer(err.to_string())),
|
2022-10-11 14:50:54 +00:00
|
|
|
}
|
|
|
|
}
|
2022-10-18 13:19:03 +00:00
|
|
|
|
|
|
|
type ValidationRequest = (
|
|
|
|
GenerateRequest,
|
2023-01-31 16:04:00 +00:00
|
|
|
oneshot::Sender<Result<ValidGenerateRequest, ValidationError>>,
|
2023-02-13 12:02:45 +00:00
|
|
|
Span,
|
2022-10-18 13:19:03 +00:00
|
|
|
);
|
|
|
|
|
2023-01-31 16:04:00 +00:00
|
|
|
#[derive(Debug)]
|
|
|
|
pub(crate) struct ValidGenerateRequest {
|
|
|
|
pub inputs: String,
|
|
|
|
pub input_length: u32,
|
|
|
|
pub parameters: NextTokenChooserParameters,
|
|
|
|
pub stopping_parameters: StoppingCriteriaParameters,
|
|
|
|
}
|
|
|
|
|
2022-10-18 13:19:03 +00:00
|
|
|
#[derive(Error, Debug)]
|
|
|
|
pub enum ValidationError {
|
2023-03-07 17:52:22 +00:00
|
|
|
#[error("`temperature` must be strictly positive")]
|
2022-10-18 13:19:03 +00:00
|
|
|
Temperature,
|
2023-03-07 17:52:22 +00:00
|
|
|
#[error("`repetition_penalty` must be strictly positive")]
|
2023-02-01 14:58:42 +00:00
|
|
|
RepetitionPenalty,
|
2023-03-09 10:33:57 +00:00
|
|
|
#[error("`top_p` must be > 0.0 and < 1.0")]
|
2022-10-18 13:19:03 +00:00
|
|
|
TopP,
|
2023-03-07 17:52:22 +00:00
|
|
|
#[error("`top_k` must be strictly positive")]
|
2022-10-18 13:19:03 +00:00
|
|
|
TopK,
|
2023-03-09 10:33:57 +00:00
|
|
|
#[error("`typical_p` must be > 0.0 and < 1.0")]
|
|
|
|
TypicalP,
|
2023-03-07 17:52:22 +00:00
|
|
|
#[error("`max_new_tokens` must be strictly positive")]
|
2023-02-15 20:56:59 +00:00
|
|
|
MaxNewTokens,
|
2023-03-07 17:52:22 +00:00
|
|
|
#[error("`inputs` tokens + `max_new_tokens` must be <= {0}. Given: {1} `inputs` tokens and {2} `max_new_tokens`")]
|
2023-02-15 20:56:59 +00:00
|
|
|
MaxTotalTokens(usize, usize, u32),
|
2023-03-07 17:52:22 +00:00
|
|
|
#[error("`inputs` must have less than {0} tokens. Given: {1}")]
|
2022-10-21 08:59:15 +00:00
|
|
|
InputLength(usize, usize),
|
2023-03-07 17:52:22 +00:00
|
|
|
#[error("`inputs` cannot be empty")]
|
2023-02-15 20:56:59 +00:00
|
|
|
EmptyInput,
|
2023-03-07 17:52:22 +00:00
|
|
|
#[error("`stop` supports up to {0} stop sequences. Given: {1}")]
|
2023-01-03 09:41:22 +00:00
|
|
|
StopSequence(usize, usize),
|
2022-11-14 16:15:19 +00:00
|
|
|
#[error("tokenizer error {0}")]
|
|
|
|
Tokenizer(String),
|
2022-10-18 13:19:03 +00:00
|
|
|
}
|