2022-10-18 13:19:03 +00:00
|
|
|
/// Payload validation logic
|
2023-07-13 12:22:37 +00:00
|
|
|
use crate::validation::ValidationError::{BestOfSampling, BestOfSeed, EmptyInput};
|
2023-01-31 16:04:00 +00:00
|
|
|
use crate::{GenerateParameters, GenerateRequest};
|
2023-04-09 18:22:27 +00:00
|
|
|
use rand::{thread_rng, Rng};
|
2024-02-29 09:56:26 +00:00
|
|
|
use std::env;
|
2023-01-31 16:04:00 +00:00
|
|
|
use text_generation_client::{NextTokenChooserParameters, StoppingCriteriaParameters};
|
2022-10-17 12:59:00 +00:00
|
|
|
use thiserror::Error;
|
2022-10-11 14:50:54 +00:00
|
|
|
use tokenizers::tokenizer::Tokenizer;
|
2023-03-09 12:10:30 +00:00
|
|
|
use tokenizers::TruncationDirection;
|
2023-10-23 13:51:12 +00:00
|
|
|
use tokio::sync::mpsc;
|
2023-04-09 18:22:27 +00:00
|
|
|
use tokio::sync::oneshot;
|
2023-02-13 12:02:45 +00:00
|
|
|
use tracing::{instrument, Span};
|
2022-10-11 14:50:54 +00:00
|
|
|
|
2022-10-18 13:19:03 +00:00
|
|
|
/// Validation
|
2022-10-11 14:50:54 +00:00
|
|
|
#[derive(Debug, Clone)]
|
2022-10-17 16:27:33 +00:00
|
|
|
pub struct Validation {
|
2023-04-09 18:22:27 +00:00
|
|
|
/// Validation parameters
|
2023-03-09 14:30:54 +00:00
|
|
|
max_best_of: usize,
|
2023-04-09 18:22:27 +00:00
|
|
|
max_stop_sequences: usize,
|
2023-08-28 09:43:47 +00:00
|
|
|
max_top_n_tokens: u32,
|
2023-04-09 18:22:27 +00:00
|
|
|
max_input_length: usize,
|
|
|
|
max_total_tokens: usize,
|
|
|
|
/// Channel to communicate with the background tokenization task
|
2023-10-23 13:51:12 +00:00
|
|
|
sender: Option<mpsc::UnboundedSender<TokenizerRequest>>,
|
2024-02-29 09:56:26 +00:00
|
|
|
skip_tokenizer_in_tgi: bool,
|
2022-10-11 14:50:54 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
impl Validation {
|
2023-02-15 20:56:59 +00:00
|
|
|
pub(crate) fn new(
|
|
|
|
workers: usize,
|
2023-04-09 18:22:27 +00:00
|
|
|
tokenizer: Option<Tokenizer>,
|
2023-03-09 14:30:54 +00:00
|
|
|
max_best_of: usize,
|
2023-02-15 20:56:59 +00:00
|
|
|
max_stop_sequences: usize,
|
2023-08-28 09:43:47 +00:00
|
|
|
max_top_n_tokens: u32,
|
2023-02-15 20:56:59 +00:00
|
|
|
max_input_length: usize,
|
|
|
|
max_total_tokens: usize,
|
|
|
|
) -> Self {
|
2023-04-09 18:22:27 +00:00
|
|
|
// If we have a fast tokenizer
|
|
|
|
let sender = if let Some(tokenizer) = tokenizer {
|
2023-10-23 13:51:12 +00:00
|
|
|
// Create round robin channel
|
|
|
|
let (validation_sender, validation_round_robin_receiver) = mpsc::unbounded_channel();
|
|
|
|
let mut senders = Vec::with_capacity(workers);
|
2023-04-09 18:22:27 +00:00
|
|
|
|
|
|
|
// Create workers
|
|
|
|
for _ in 0..workers {
|
|
|
|
let tokenizer_clone = tokenizer.clone();
|
2023-10-23 13:51:12 +00:00
|
|
|
let (tokenizer_sender, tokenizer_receiver) = mpsc::unbounded_channel();
|
|
|
|
senders.push(tokenizer_sender);
|
2023-04-09 18:22:27 +00:00
|
|
|
|
|
|
|
// Spawn worker
|
|
|
|
tokio::task::spawn_blocking(move || {
|
2023-10-23 13:51:12 +00:00
|
|
|
tokenizer_worker(tokenizer_clone, tokenizer_receiver)
|
2023-04-09 18:22:27 +00:00
|
|
|
});
|
|
|
|
}
|
2023-10-23 13:51:12 +00:00
|
|
|
|
|
|
|
// Create tokenization round robin task
|
|
|
|
tokio::spawn(round_robin_task(validation_round_robin_receiver, senders));
|
|
|
|
|
2023-04-09 18:22:27 +00:00
|
|
|
Some(validation_sender)
|
|
|
|
} else {
|
|
|
|
None
|
|
|
|
};
|
|
|
|
|
2024-02-29 09:56:26 +00:00
|
|
|
let skip_tokenizer_in_tgi = env::var("SKIP_TOKENIZER_IN_TGI")
|
|
|
|
.ok()
|
|
|
|
.map_or(false, |value| value.to_lowercase() == "true");
|
|
|
|
|
2023-04-09 18:22:27 +00:00
|
|
|
Self {
|
|
|
|
max_best_of,
|
|
|
|
sender,
|
2023-02-15 20:56:59 +00:00
|
|
|
max_stop_sequences,
|
2023-08-28 09:43:47 +00:00
|
|
|
max_top_n_tokens,
|
2022-10-18 13:19:03 +00:00
|
|
|
max_input_length,
|
2023-02-15 20:56:59 +00:00
|
|
|
max_total_tokens,
|
2024-02-29 09:56:26 +00:00
|
|
|
skip_tokenizer_in_tgi,
|
2023-04-09 18:22:27 +00:00
|
|
|
}
|
|
|
|
}
|
2022-10-11 14:50:54 +00:00
|
|
|
|
2023-11-20 09:33:44 +00:00
|
|
|
#[instrument(skip(self, inputs))]
|
2023-04-09 18:22:27 +00:00
|
|
|
async fn validate_input(
|
|
|
|
&self,
|
|
|
|
inputs: String,
|
|
|
|
truncate: Option<usize>,
|
2023-10-04 15:38:42 +00:00
|
|
|
max_new_tokens: Option<u32>,
|
|
|
|
) -> Result<(String, usize, u32), ValidationError> {
|
2023-04-09 18:22:27 +00:00
|
|
|
// If we have a fast tokenizer
|
|
|
|
if let Some(sender) = &self.sender {
|
|
|
|
// Create response channel
|
|
|
|
let (response_sender, response_receiver) = oneshot::channel();
|
|
|
|
// Send request to the background validation task
|
|
|
|
// Unwrap is safe here
|
|
|
|
sender
|
|
|
|
.send(((inputs, truncate), response_sender, Span::current()))
|
|
|
|
.unwrap();
|
|
|
|
|
|
|
|
// Await on response channel
|
|
|
|
// Unwrap is safe here
|
|
|
|
let (inputs, input_length) = response_receiver.await.unwrap()?;
|
|
|
|
|
2024-03-18 14:23:13 +00:00
|
|
|
let input_length = if self.skip_tokenizer_in_tgi {
|
|
|
|
inputs.chars().filter(|&c| c == ',').count() + 1
|
|
|
|
} else {
|
|
|
|
truncate.unwrap_or(self.max_input_length)
|
|
|
|
};
|
|
|
|
|
2023-04-09 18:22:27 +00:00
|
|
|
// Get total tokens
|
2023-10-04 15:38:42 +00:00
|
|
|
let max_new_tokens: u32 = if let Some(max_new_tokens) = max_new_tokens {
|
|
|
|
max_new_tokens
|
|
|
|
} else {
|
|
|
|
self.max_total_tokens.saturating_sub(input_length) as u32
|
|
|
|
};
|
2023-04-09 18:22:27 +00:00
|
|
|
let total_tokens = input_length + max_new_tokens as usize;
|
|
|
|
|
|
|
|
// Validate MaxTotalTokens
|
|
|
|
if total_tokens > self.max_total_tokens {
|
|
|
|
return Err(ValidationError::MaxTotalTokens(
|
|
|
|
self.max_total_tokens,
|
|
|
|
input_length,
|
|
|
|
max_new_tokens,
|
|
|
|
));
|
|
|
|
}
|
|
|
|
|
|
|
|
// Validate InputLength
|
|
|
|
if input_length > self.max_input_length {
|
|
|
|
return Err(ValidationError::InputLength(
|
|
|
|
self.max_input_length,
|
|
|
|
input_length,
|
|
|
|
));
|
|
|
|
}
|
|
|
|
|
|
|
|
metrics::histogram!("tgi_request_input_length", input_length as f64);
|
2023-10-04 15:38:42 +00:00
|
|
|
Ok((inputs, input_length, max_new_tokens))
|
2023-04-09 18:22:27 +00:00
|
|
|
}
|
|
|
|
// Return inputs without validation
|
|
|
|
else {
|
|
|
|
// In this case, we don't know the real length in tokens of the inputs
|
|
|
|
// However, the inputs will be truncated by the python servers
|
|
|
|
// We make sure that truncate + max_new_tokens <= self.max_total_tokens
|
2023-10-04 15:38:42 +00:00
|
|
|
let max_new_tokens: u32 = if let Some(max_new_tokens) = max_new_tokens {
|
|
|
|
max_new_tokens
|
2023-10-23 13:51:12 +00:00
|
|
|
} else if let Some(truncate) = truncate {
|
|
|
|
self.max_total_tokens.saturating_sub(truncate) as u32
|
2023-10-04 15:38:42 +00:00
|
|
|
} else {
|
2023-10-23 13:51:12 +00:00
|
|
|
return Err(ValidationError::UnsetMaxNewTokens);
|
2023-10-04 15:38:42 +00:00
|
|
|
};
|
2024-02-29 09:56:26 +00:00
|
|
|
let input_length = if self.skip_tokenizer_in_tgi {
|
|
|
|
inputs.chars().filter(|&c| c == ',').count() + 1
|
|
|
|
} else {
|
|
|
|
truncate.unwrap_or(self.max_input_length)
|
|
|
|
};
|
2023-04-09 18:22:27 +00:00
|
|
|
|
|
|
|
// Validate MaxNewTokens
|
2023-04-24 15:59:00 +00:00
|
|
|
if (input_length as u32 + max_new_tokens) > self.max_total_tokens as u32 {
|
2023-04-09 18:22:27 +00:00
|
|
|
return Err(ValidationError::MaxNewTokens(
|
|
|
|
self.max_total_tokens - self.max_input_length,
|
|
|
|
max_new_tokens,
|
|
|
|
));
|
|
|
|
}
|
|
|
|
|
2023-10-04 15:38:42 +00:00
|
|
|
Ok((inputs, input_length, max_new_tokens))
|
2022-10-11 14:50:54 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-10-18 13:19:03 +00:00
|
|
|
/// Validate a payload and get the number of tokens in the input
|
2023-02-13 12:02:45 +00:00
|
|
|
#[instrument(skip_all)]
|
2022-10-11 14:50:54 +00:00
|
|
|
pub(crate) async fn validate(
|
|
|
|
&self,
|
|
|
|
request: GenerateRequest,
|
2023-01-31 16:04:00 +00:00
|
|
|
) -> Result<ValidGenerateRequest, ValidationError> {
|
2023-04-09 18:22:27 +00:00
|
|
|
let GenerateParameters {
|
|
|
|
best_of,
|
|
|
|
temperature,
|
|
|
|
repetition_penalty,
|
|
|
|
top_k,
|
|
|
|
top_p,
|
|
|
|
typical_p,
|
|
|
|
do_sample,
|
|
|
|
max_new_tokens,
|
|
|
|
stop: stop_sequences,
|
|
|
|
truncate,
|
|
|
|
seed,
|
|
|
|
watermark,
|
2023-06-02 15:12:30 +00:00
|
|
|
decoder_input_details,
|
2023-08-28 09:43:47 +00:00
|
|
|
top_n_tokens,
|
2023-04-09 18:22:27 +00:00
|
|
|
..
|
|
|
|
} = request.parameters;
|
|
|
|
|
|
|
|
// sampling must be true when best_of > 1
|
|
|
|
let best_of = best_of.unwrap_or(1);
|
|
|
|
let sampling = do_sample
|
|
|
|
|| temperature.is_some()
|
|
|
|
|| top_k.is_some()
|
|
|
|
|| top_p.is_some()
|
|
|
|
|| typical_p.is_some();
|
|
|
|
|
|
|
|
if best_of > 1 && !sampling {
|
|
|
|
return Err(BestOfSampling);
|
|
|
|
}
|
|
|
|
|
|
|
|
let temperature = temperature.unwrap_or(1.0);
|
|
|
|
if temperature <= 0.0 {
|
|
|
|
return Err(ValidationError::Temperature);
|
|
|
|
}
|
|
|
|
|
|
|
|
let repetition_penalty = repetition_penalty.unwrap_or(1.0);
|
|
|
|
if repetition_penalty <= 0.0 {
|
|
|
|
return Err(ValidationError::RepetitionPenalty);
|
|
|
|
}
|
|
|
|
|
2024-03-27 12:32:20 +00:00
|
|
|
// TODO: enable watermark with fp8 quantization
|
|
|
|
let quantization_enabled = env::var("QUANT_CONFIG")
|
|
|
|
.ok()
|
|
|
|
.map_or(false, |value| !value.is_empty());
|
|
|
|
if watermark && quantization_enabled {
|
|
|
|
return Err(ValidationError::WatermarkWithQuantization)
|
|
|
|
}
|
|
|
|
|
2023-04-09 18:22:27 +00:00
|
|
|
// Different because the proto default value is not a valid value
|
|
|
|
// for the user
|
|
|
|
let top_p = top_p
|
|
|
|
.map(|value| {
|
|
|
|
if value <= 0.0 || value >= 1.0 {
|
|
|
|
return Err(ValidationError::TopP);
|
|
|
|
}
|
|
|
|
Ok(value)
|
|
|
|
})
|
|
|
|
.unwrap_or(Ok(1.0))?;
|
|
|
|
|
|
|
|
let typical_p = typical_p
|
|
|
|
.map(|value| {
|
|
|
|
if value <= 0.0 || value >= 1.0 {
|
|
|
|
return Err(ValidationError::TypicalP);
|
|
|
|
}
|
|
|
|
Ok(value)
|
|
|
|
})
|
|
|
|
.unwrap_or(Ok(1.0))?;
|
|
|
|
|
|
|
|
let top_k: u32 = top_k
|
|
|
|
.map(|value| {
|
|
|
|
if value <= 0 {
|
|
|
|
return Err(ValidationError::TopK);
|
|
|
|
}
|
|
|
|
Ok(value as u32)
|
|
|
|
})
|
|
|
|
.unwrap_or(Ok(0))?;
|
|
|
|
|
2023-10-04 15:38:42 +00:00
|
|
|
if max_new_tokens == Some(0) {
|
2023-04-09 18:22:27 +00:00
|
|
|
return Err(ValidationError::NegativeMaxNewTokens);
|
|
|
|
}
|
|
|
|
|
|
|
|
if stop_sequences.len() > self.max_stop_sequences {
|
|
|
|
return Err(ValidationError::StopSequence(
|
|
|
|
self.max_stop_sequences,
|
|
|
|
stop_sequences.len(),
|
|
|
|
));
|
|
|
|
}
|
|
|
|
|
|
|
|
// If seed is None, assign a random one
|
|
|
|
let seed = match seed {
|
|
|
|
None => thread_rng().gen(),
|
|
|
|
Some(seed) => {
|
|
|
|
if best_of > 1 {
|
|
|
|
return Err(BestOfSeed);
|
|
|
|
}
|
|
|
|
seed
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2023-08-28 09:43:47 +00:00
|
|
|
let top_n_tokens = top_n_tokens
|
|
|
|
.map(|value| {
|
|
|
|
if value > self.max_top_n_tokens {
|
|
|
|
return Err(ValidationError::TopNTokens(self.max_top_n_tokens, value));
|
|
|
|
}
|
|
|
|
Ok(value)
|
|
|
|
})
|
|
|
|
.unwrap_or(Ok(0))?;
|
|
|
|
|
2023-04-09 18:22:27 +00:00
|
|
|
// Check if inputs is empty
|
|
|
|
if request.inputs.is_empty() {
|
|
|
|
return Err(EmptyInput);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check if truncate is strictly positive and less than max_input_length
|
|
|
|
let truncate = truncate
|
|
|
|
.map(|value| {
|
|
|
|
if value == 0 || value > self.max_input_length {
|
|
|
|
return Err(ValidationError::Truncate(self.max_input_length, value));
|
|
|
|
}
|
|
|
|
Ok(Some(value))
|
|
|
|
})
|
|
|
|
.unwrap_or(Ok(None))?;
|
|
|
|
|
|
|
|
// Validate inputs
|
2023-10-04 15:38:42 +00:00
|
|
|
let (inputs, input_length, max_new_tokens) = self
|
2023-04-09 18:22:27 +00:00
|
|
|
.validate_input(request.inputs, truncate, max_new_tokens)
|
|
|
|
.await?;
|
|
|
|
|
|
|
|
let parameters = NextTokenChooserParameters {
|
|
|
|
temperature,
|
|
|
|
repetition_penalty,
|
|
|
|
top_k,
|
|
|
|
top_p,
|
|
|
|
typical_p,
|
|
|
|
do_sample,
|
|
|
|
seed,
|
|
|
|
watermark,
|
|
|
|
};
|
|
|
|
let stopping_parameters = StoppingCriteriaParameters {
|
|
|
|
max_new_tokens,
|
|
|
|
stop_sequences,
|
|
|
|
ignore_eos_token: false,
|
|
|
|
};
|
|
|
|
|
|
|
|
metrics::histogram!("tgi_request_max_new_tokens", max_new_tokens as f64);
|
|
|
|
|
|
|
|
Ok(ValidGenerateRequest {
|
|
|
|
inputs,
|
2023-06-02 15:12:30 +00:00
|
|
|
decoder_input_details,
|
2023-04-24 15:59:00 +00:00
|
|
|
input_length: input_length as u32,
|
2023-04-09 18:22:27 +00:00
|
|
|
truncate: truncate.unwrap_or(self.max_input_length) as u32,
|
|
|
|
parameters,
|
|
|
|
stopping_parameters,
|
2023-09-27 08:40:18 +00:00
|
|
|
top_n_tokens,
|
2023-04-09 18:22:27 +00:00
|
|
|
})
|
2022-10-11 14:50:54 +00:00
|
|
|
}
|
2023-03-09 14:30:54 +00:00
|
|
|
|
|
|
|
/// Validate the best_of parameter
|
|
|
|
#[instrument(skip_all)]
|
|
|
|
pub(crate) fn validate_best_of(&self, best_of: usize) -> Result<usize, ValidationError> {
|
|
|
|
if self.max_best_of == 1 && best_of != 1 {
|
|
|
|
return Err(ValidationError::BestOfDisabled);
|
|
|
|
}
|
|
|
|
|
|
|
|
if best_of > self.max_best_of {
|
|
|
|
return Err(ValidationError::BestOf(self.max_best_of, best_of));
|
|
|
|
}
|
|
|
|
|
|
|
|
Ok(best_of)
|
|
|
|
}
|
2022-10-11 14:50:54 +00:00
|
|
|
}
|
|
|
|
|
2023-10-23 13:51:12 +00:00
|
|
|
/// Round robin tokenization task
|
|
|
|
async fn round_robin_task(
|
|
|
|
mut receiver: mpsc::UnboundedReceiver<TokenizerRequest>,
|
|
|
|
senders: Vec<mpsc::UnboundedSender<TokenizerRequest>>,
|
|
|
|
) {
|
|
|
|
loop {
|
|
|
|
for sender in &senders {
|
|
|
|
match receiver.recv().await {
|
|
|
|
None => return,
|
|
|
|
Some(request) => sender.send(request).unwrap(),
|
|
|
|
};
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-04-09 18:22:27 +00:00
|
|
|
/// Start tokenization workers
|
2023-10-23 13:51:12 +00:00
|
|
|
fn tokenizer_worker(tokenizer: Tokenizer, mut receiver: mpsc::UnboundedReceiver<TokenizerRequest>) {
|
2022-10-18 13:19:03 +00:00
|
|
|
// Loop over requests
|
2023-10-23 13:51:12 +00:00
|
|
|
while let Some(((inputs, truncate), response_tx, parent_span)) = receiver.blocking_recv() {
|
2023-02-13 12:02:45 +00:00
|
|
|
parent_span.in_scope(|| {
|
|
|
|
response_tx
|
2023-04-09 18:22:27 +00:00
|
|
|
.send(prepare_input(inputs, truncate, &tokenizer))
|
2023-02-13 12:02:45 +00:00
|
|
|
.unwrap_or(())
|
|
|
|
})
|
2023-01-03 09:41:22 +00:00
|
|
|
}
|
|
|
|
}
|
2022-10-11 14:50:54 +00:00
|
|
|
|
2023-04-09 18:22:27 +00:00
|
|
|
/// Get input length and optionally truncate it
|
|
|
|
fn prepare_input(
|
|
|
|
inputs: String,
|
|
|
|
truncate: Option<usize>,
|
2023-01-03 09:41:22 +00:00
|
|
|
tokenizer: &Tokenizer,
|
2023-04-09 18:22:27 +00:00
|
|
|
) -> Result<(String, usize), ValidationError> {
|
2023-03-09 12:10:30 +00:00
|
|
|
// Get the number of tokens in the input
|
|
|
|
let mut encoding = tokenizer
|
2023-04-09 18:22:27 +00:00
|
|
|
.encode(inputs.clone(), true)
|
2023-03-09 12:10:30 +00:00
|
|
|
.map_err(|err| ValidationError::Tokenizer(err.to_string()))?;
|
|
|
|
|
2023-04-09 18:22:27 +00:00
|
|
|
// Optionally truncate
|
|
|
|
let (inputs, input_length) = match truncate {
|
2023-04-17 14:51:53 +00:00
|
|
|
// Truncate is some and < encoding length
|
|
|
|
Some(truncate) if truncate < encoding.len() => {
|
2023-04-09 18:22:27 +00:00
|
|
|
// truncate encoding and decode new inputs
|
|
|
|
encoding.truncate(truncate, 0, TruncationDirection::Left);
|
|
|
|
let inputs = tokenizer
|
2023-08-14 17:26:19 +00:00
|
|
|
.decode(encoding.get_ids(), false)
|
2023-04-09 18:22:27 +00:00
|
|
|
.map_err(|err| ValidationError::Tokenizer(err.to_string()))?;
|
|
|
|
(inputs, encoding.len())
|
|
|
|
}
|
|
|
|
// Nothing to do
|
|
|
|
_ => (inputs, encoding.len()),
|
2023-03-09 12:10:30 +00:00
|
|
|
};
|
|
|
|
|
2023-04-09 18:22:27 +00:00
|
|
|
Ok((inputs, input_length))
|
2022-10-11 14:50:54 +00:00
|
|
|
}
|
2022-10-18 13:19:03 +00:00
|
|
|
|
2023-04-09 18:22:27 +00:00
|
|
|
type TokenizerRequest = (
|
|
|
|
(String, Option<usize>),
|
|
|
|
oneshot::Sender<Result<(String, usize), ValidationError>>,
|
2023-02-13 12:02:45 +00:00
|
|
|
Span,
|
2022-10-18 13:19:03 +00:00
|
|
|
);
|
|
|
|
|
2023-01-31 16:04:00 +00:00
|
|
|
#[derive(Debug)]
|
|
|
|
pub(crate) struct ValidGenerateRequest {
|
|
|
|
pub inputs: String,
|
2023-04-24 15:59:00 +00:00
|
|
|
pub input_length: u32,
|
2023-04-09 18:22:27 +00:00
|
|
|
pub truncate: u32,
|
2023-06-02 15:12:30 +00:00
|
|
|
pub decoder_input_details: bool,
|
2023-01-31 16:04:00 +00:00
|
|
|
pub parameters: NextTokenChooserParameters,
|
|
|
|
pub stopping_parameters: StoppingCriteriaParameters,
|
2023-08-28 09:43:47 +00:00
|
|
|
pub top_n_tokens: u32,
|
2023-01-31 16:04:00 +00:00
|
|
|
}
|
|
|
|
|
2022-10-18 13:19:03 +00:00
|
|
|
#[derive(Error, Debug)]
|
|
|
|
pub enum ValidationError {
|
2023-03-09 14:30:54 +00:00
|
|
|
#[error("`best_of` must be > 0 and <= {0}. Given: {1}")]
|
|
|
|
BestOf(usize, usize),
|
|
|
|
#[error("`best_of` != 1 is not allowed for this endpoint")]
|
|
|
|
BestOfDisabled,
|
|
|
|
#[error("you must use sampling when `best_of` is > 1")]
|
|
|
|
BestOfSampling,
|
|
|
|
#[error("`seed` must not be set when `best_of` > 1")]
|
|
|
|
BestOfSeed,
|
|
|
|
#[error("`best_of` != 1 is not supported when streaming tokens")]
|
|
|
|
BestOfStream,
|
2023-08-28 09:43:47 +00:00
|
|
|
#[error("`top_n_tokens` must be >= 0 and <= {0}. Given: {1}")]
|
|
|
|
TopNTokens(u32, u32),
|
|
|
|
#[error("`top_n_tokens` != 0 is not allowed for this endpoint")]
|
|
|
|
TopNTokensDisabled,
|
2023-06-02 15:12:30 +00:00
|
|
|
#[error("`decoder_input_details` == true is not supported when streaming tokens")]
|
|
|
|
PrefillDetailsStream,
|
2023-03-07 17:52:22 +00:00
|
|
|
#[error("`temperature` must be strictly positive")]
|
2022-10-18 13:19:03 +00:00
|
|
|
Temperature,
|
2023-03-07 17:52:22 +00:00
|
|
|
#[error("`repetition_penalty` must be strictly positive")]
|
2023-02-01 14:58:42 +00:00
|
|
|
RepetitionPenalty,
|
2023-03-09 10:33:57 +00:00
|
|
|
#[error("`top_p` must be > 0.0 and < 1.0")]
|
2022-10-18 13:19:03 +00:00
|
|
|
TopP,
|
2023-03-07 17:52:22 +00:00
|
|
|
#[error("`top_k` must be strictly positive")]
|
2022-10-18 13:19:03 +00:00
|
|
|
TopK,
|
2023-03-09 12:10:30 +00:00
|
|
|
#[error("`truncate` must be strictly positive and less than {0}. Given: {1}")]
|
|
|
|
Truncate(usize, usize),
|
2023-03-09 10:33:57 +00:00
|
|
|
#[error("`typical_p` must be > 0.0 and < 1.0")]
|
|
|
|
TypicalP,
|
2023-10-11 08:46:40 +00:00
|
|
|
#[error("one of `max_new_tokens` or `truncate` must be set if a fast tokenizer is not in use")]
|
|
|
|
UnsetMaxNewTokens,
|
2023-03-07 17:52:22 +00:00
|
|
|
#[error("`max_new_tokens` must be strictly positive")]
|
2023-04-09 18:22:27 +00:00
|
|
|
NegativeMaxNewTokens,
|
|
|
|
#[error("`max_new_tokens` must be <= {0}. Given: {1}")]
|
|
|
|
MaxNewTokens(usize, u32),
|
2023-03-07 17:52:22 +00:00
|
|
|
#[error("`inputs` tokens + `max_new_tokens` must be <= {0}. Given: {1} `inputs` tokens and {2} `max_new_tokens`")]
|
2023-02-15 20:56:59 +00:00
|
|
|
MaxTotalTokens(usize, usize, u32),
|
2023-03-07 17:52:22 +00:00
|
|
|
#[error("`inputs` must have less than {0} tokens. Given: {1}")]
|
2022-10-21 08:59:15 +00:00
|
|
|
InputLength(usize, usize),
|
2023-03-07 17:52:22 +00:00
|
|
|
#[error("`inputs` cannot be empty")]
|
2023-02-15 20:56:59 +00:00
|
|
|
EmptyInput,
|
2023-03-07 17:52:22 +00:00
|
|
|
#[error("`stop` supports up to {0} stop sequences. Given: {1}")]
|
2023-01-03 09:41:22 +00:00
|
|
|
StopSequence(usize, usize),
|
2022-11-14 16:15:19 +00:00
|
|
|
#[error("tokenizer error {0}")]
|
|
|
|
Tokenizer(String),
|
2024-03-27 12:32:20 +00:00
|
|
|
#[error("`watermark` = true is not allowed with FP8 quantization.")]
|
|
|
|
WatermarkWithQuantization,
|
2022-10-18 13:19:03 +00:00
|
|
|
}
|
2023-04-25 12:13:14 +00:00
|
|
|
|
|
|
|
#[cfg(test)]
|
2023-04-26 18:23:54 +00:00
|
|
|
mod tests {
|
2023-04-25 12:13:14 +00:00
|
|
|
use super::*;
|
2023-04-26 14:14:40 +00:00
|
|
|
use crate::default_parameters;
|
|
|
|
use crate::tests::get_tokenizer;
|
2023-04-25 12:13:14 +00:00
|
|
|
|
|
|
|
#[tokio::test]
|
2023-04-26 18:23:54 +00:00
|
|
|
async fn test_validation_max_new_tokens() {
|
2023-04-25 12:13:14 +00:00
|
|
|
let tokenizer = None;
|
|
|
|
let max_best_of = 2;
|
|
|
|
let max_stop_sequence = 3;
|
2023-08-28 09:43:47 +00:00
|
|
|
let max_top_n_tokens = 4;
|
|
|
|
let max_input_length = 5;
|
|
|
|
let max_total_tokens = 6;
|
2023-04-25 12:13:14 +00:00
|
|
|
let workers = 1;
|
2023-04-26 18:23:54 +00:00
|
|
|
let validation = Validation::new(
|
|
|
|
workers,
|
|
|
|
tokenizer,
|
|
|
|
max_best_of,
|
|
|
|
max_stop_sequence,
|
2023-08-28 09:43:47 +00:00
|
|
|
max_top_n_tokens,
|
2023-04-26 18:23:54 +00:00
|
|
|
max_input_length,
|
|
|
|
max_total_tokens,
|
|
|
|
);
|
2023-04-25 12:13:14 +00:00
|
|
|
|
|
|
|
let max_new_tokens = 10;
|
2023-04-26 18:23:54 +00:00
|
|
|
match validation
|
2023-10-04 15:38:42 +00:00
|
|
|
.validate_input("Hello".to_string(), None, Some(max_new_tokens))
|
2023-04-26 18:23:54 +00:00
|
|
|
.await
|
|
|
|
{
|
2023-04-25 12:13:14 +00:00
|
|
|
Err(ValidationError::MaxNewTokens(1, 10)) => (),
|
2023-04-26 18:23:54 +00:00
|
|
|
_ => panic!("Unexpected not max new tokens"),
|
2023-04-25 12:13:14 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[tokio::test]
|
2023-04-26 18:23:54 +00:00
|
|
|
async fn test_validation_input_length() {
|
2023-04-25 12:13:14 +00:00
|
|
|
let tokenizer = Some(get_tokenizer().await);
|
|
|
|
let max_best_of = 2;
|
|
|
|
let max_stop_sequence = 3;
|
2023-08-28 09:43:47 +00:00
|
|
|
let max_top_n_tokens = 4;
|
|
|
|
let max_input_length = 5;
|
|
|
|
let max_total_tokens = 6;
|
2023-04-25 12:13:14 +00:00
|
|
|
let workers = 1;
|
2023-04-26 18:23:54 +00:00
|
|
|
let validation = Validation::new(
|
|
|
|
workers,
|
|
|
|
tokenizer,
|
|
|
|
max_best_of,
|
|
|
|
max_stop_sequence,
|
2023-08-28 09:43:47 +00:00
|
|
|
max_top_n_tokens,
|
2023-04-26 18:23:54 +00:00
|
|
|
max_input_length,
|
|
|
|
max_total_tokens,
|
|
|
|
);
|
2023-04-25 12:13:14 +00:00
|
|
|
|
|
|
|
let max_new_tokens = 10;
|
2023-04-26 18:23:54 +00:00
|
|
|
match validation
|
2023-10-04 15:38:42 +00:00
|
|
|
.validate_input("Hello".to_string(), None, Some(max_new_tokens))
|
2023-04-26 18:23:54 +00:00
|
|
|
.await
|
|
|
|
{
|
2023-08-28 09:43:47 +00:00
|
|
|
Err(ValidationError::MaxTotalTokens(6, 1, 10)) => (),
|
2023-04-26 18:23:54 +00:00
|
|
|
_ => panic!("Unexpected not max new tokens"),
|
2023-04-25 12:13:14 +00:00
|
|
|
}
|
|
|
|
}
|
2023-04-26 14:14:40 +00:00
|
|
|
|
|
|
|
#[tokio::test]
|
2023-04-26 18:23:54 +00:00
|
|
|
async fn test_validation_best_of_sampling() {
|
2023-04-26 14:14:40 +00:00
|
|
|
let tokenizer = Some(get_tokenizer().await);
|
|
|
|
let max_best_of = 2;
|
|
|
|
let max_stop_sequence = 3;
|
2023-08-28 09:43:47 +00:00
|
|
|
let max_top_n_tokens = 4;
|
|
|
|
let max_input_length = 5;
|
|
|
|
let max_total_tokens = 6;
|
2023-04-26 14:14:40 +00:00
|
|
|
let workers = 1;
|
2023-04-26 18:23:54 +00:00
|
|
|
let validation = Validation::new(
|
|
|
|
workers,
|
|
|
|
tokenizer,
|
|
|
|
max_best_of,
|
|
|
|
max_stop_sequence,
|
2023-08-28 09:43:47 +00:00
|
|
|
max_top_n_tokens,
|
2023-04-26 18:23:54 +00:00
|
|
|
max_input_length,
|
|
|
|
max_total_tokens,
|
|
|
|
);
|
|
|
|
match validation
|
|
|
|
.validate(GenerateRequest {
|
|
|
|
inputs: "Hello".to_string(),
|
|
|
|
parameters: GenerateParameters {
|
|
|
|
best_of: Some(2),
|
|
|
|
do_sample: false,
|
|
|
|
..default_parameters()
|
|
|
|
},
|
|
|
|
})
|
|
|
|
.await
|
|
|
|
{
|
2023-04-26 14:14:40 +00:00
|
|
|
Err(ValidationError::BestOfSampling) => (),
|
2023-04-26 18:23:54 +00:00
|
|
|
_ => panic!("Unexpected not best of sampling"),
|
2023-04-26 14:14:40 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[tokio::test]
|
2023-04-26 18:23:54 +00:00
|
|
|
async fn test_validation_top_p() {
|
2023-04-26 14:14:40 +00:00
|
|
|
let tokenizer = Some(get_tokenizer().await);
|
|
|
|
let max_best_of = 2;
|
|
|
|
let max_stop_sequence = 3;
|
2023-08-28 09:43:47 +00:00
|
|
|
let max_top_n_tokens = 4;
|
|
|
|
let max_input_length = 5;
|
|
|
|
let max_total_tokens = 6;
|
2023-04-26 14:14:40 +00:00
|
|
|
let workers = 1;
|
2023-04-26 18:23:54 +00:00
|
|
|
let validation = Validation::new(
|
|
|
|
workers,
|
|
|
|
tokenizer,
|
|
|
|
max_best_of,
|
|
|
|
max_stop_sequence,
|
2023-08-28 09:43:47 +00:00
|
|
|
max_top_n_tokens,
|
2023-04-26 18:23:54 +00:00
|
|
|
max_input_length,
|
|
|
|
max_total_tokens,
|
|
|
|
);
|
|
|
|
match validation
|
|
|
|
.validate(GenerateRequest {
|
|
|
|
inputs: "Hello".to_string(),
|
|
|
|
parameters: GenerateParameters {
|
|
|
|
top_p: Some(1.0),
|
|
|
|
..default_parameters()
|
|
|
|
},
|
|
|
|
})
|
|
|
|
.await
|
|
|
|
{
|
2023-04-26 14:14:40 +00:00
|
|
|
Err(ValidationError::TopP) => (),
|
2023-04-26 18:23:54 +00:00
|
|
|
_ => panic!("Unexpected top_p"),
|
2023-04-26 14:14:40 +00:00
|
|
|
}
|
|
|
|
|
2023-04-26 18:23:54 +00:00
|
|
|
match validation
|
|
|
|
.validate(GenerateRequest {
|
|
|
|
inputs: "Hello".to_string(),
|
|
|
|
parameters: GenerateParameters {
|
|
|
|
top_p: Some(0.99),
|
|
|
|
..default_parameters()
|
|
|
|
},
|
|
|
|
})
|
|
|
|
.await
|
|
|
|
{
|
2023-04-26 14:14:40 +00:00
|
|
|
Ok(_) => (),
|
2023-04-26 18:23:54 +00:00
|
|
|
_ => panic!("Unexpected top_p error"),
|
2023-04-26 14:14:40 +00:00
|
|
|
}
|
|
|
|
|
2023-04-26 18:23:54 +00:00
|
|
|
let valid_request = validation
|
|
|
|
.validate(GenerateRequest {
|
|
|
|
inputs: "Hello".to_string(),
|
|
|
|
parameters: GenerateParameters {
|
|
|
|
top_p: None,
|
|
|
|
..default_parameters()
|
|
|
|
},
|
|
|
|
})
|
|
|
|
.await
|
|
|
|
.unwrap();
|
2023-04-26 14:14:40 +00:00
|
|
|
// top_p == 1.0 is invalid for users to ask for but it's the default resolved value.
|
|
|
|
assert_eq!(valid_request.parameters.top_p, 1.0);
|
|
|
|
}
|
2023-08-28 09:43:47 +00:00
|
|
|
|
|
|
|
#[tokio::test]
|
|
|
|
async fn test_validation_top_n_tokens() {
|
|
|
|
let tokenizer = Some(get_tokenizer().await);
|
|
|
|
let max_best_of = 2;
|
|
|
|
let max_stop_sequences = 3;
|
|
|
|
let max_top_n_tokens = 4;
|
|
|
|
let max_input_length = 5;
|
|
|
|
let max_total_tokens = 6;
|
|
|
|
let workers = 1;
|
|
|
|
let validation = Validation::new(
|
|
|
|
workers,
|
|
|
|
tokenizer,
|
|
|
|
max_best_of,
|
|
|
|
max_stop_sequences,
|
|
|
|
max_top_n_tokens,
|
|
|
|
max_input_length,
|
|
|
|
max_total_tokens,
|
|
|
|
);
|
|
|
|
match validation
|
|
|
|
.validate(GenerateRequest {
|
|
|
|
inputs: "Hello".to_string(),
|
|
|
|
parameters: GenerateParameters {
|
|
|
|
top_n_tokens: Some(5),
|
|
|
|
..default_parameters()
|
|
|
|
},
|
|
|
|
})
|
|
|
|
.await
|
|
|
|
{
|
|
|
|
Err(ValidationError::TopNTokens(4, 5)) => (),
|
|
|
|
_ => panic!("Unexpected top_n_tokens"),
|
|
|
|
}
|
|
|
|
|
|
|
|
validation
|
|
|
|
.validate(GenerateRequest {
|
|
|
|
inputs: "Hello".to_string(),
|
|
|
|
parameters: GenerateParameters {
|
|
|
|
top_n_tokens: Some(4),
|
|
|
|
..default_parameters()
|
|
|
|
},
|
|
|
|
})
|
|
|
|
.await
|
|
|
|
.unwrap();
|
|
|
|
|
|
|
|
validation
|
|
|
|
.validate(GenerateRequest {
|
|
|
|
inputs: "Hello".to_string(),
|
|
|
|
parameters: GenerateParameters {
|
|
|
|
top_n_tokens: Some(0),
|
|
|
|
..default_parameters()
|
|
|
|
},
|
|
|
|
})
|
|
|
|
.await
|
|
|
|
.unwrap();
|
|
|
|
|
|
|
|
let valid_request = validation
|
|
|
|
.validate(GenerateRequest {
|
|
|
|
inputs: "Hello".to_string(),
|
|
|
|
parameters: GenerateParameters {
|
|
|
|
top_n_tokens: None,
|
|
|
|
..default_parameters()
|
|
|
|
},
|
|
|
|
})
|
|
|
|
.await
|
|
|
|
.unwrap();
|
|
|
|
|
|
|
|
assert_eq!(valid_request.top_n_tokens, 0);
|
|
|
|
}
|
2023-04-25 12:13:14 +00:00
|
|
|
}
|