text-generation-inference/router/src/validation.rs

175 lines
5.2 KiB
Rust
Raw Normal View History

2022-10-18 13:19:03 +00:00
/// Payload validation logic
2022-10-27 12:25:29 +00:00
use crate::{ErrorResponse, GenerateRequest};
2022-10-17 12:59:00 +00:00
use axum::http::StatusCode;
2022-10-27 12:25:29 +00:00
use axum::Json;
2022-10-17 12:59:00 +00:00
use thiserror::Error;
use tokenizers::tokenizer::Tokenizer;
use tokio::sync::{mpsc, oneshot};
const MAX_MAX_NEW_TOKENS: u32 = 512;
const MAX_STOP_SEQUENCES: usize = 4;
2022-10-18 13:19:03 +00:00
/// Validation
#[derive(Debug, Clone)]
2022-10-17 16:27:33 +00:00
pub struct Validation {
2022-10-18 13:19:03 +00:00
/// Channel to communicate with the background validation task
sender: mpsc::Sender<ValidationRequest>,
}
impl Validation {
2022-10-18 13:19:03 +00:00
pub(crate) fn new(workers: usize, tokenizer: Tokenizer, max_input_length: usize) -> Self {
// Crate channel
let (validation_sender, validation_receiver) = mpsc::channel(128);
2022-10-18 13:19:03 +00:00
// Launch background validation task
tokio::spawn(validation_task(
workers,
tokenizer,
max_input_length,
validation_receiver,
));
Self {
sender: validation_sender,
}
}
2022-10-18 13:19:03 +00:00
/// Validate a payload and get the number of tokens in the input
pub(crate) async fn validate(
&self,
request: GenerateRequest,
) -> Result<(usize, GenerateRequest), ValidationError> {
2022-10-18 13:19:03 +00:00
// Create response channel
let (sender, receiver) = oneshot::channel();
2022-10-18 13:19:03 +00:00
// Send request to the background validation task
// Unwrap is safe here
self.sender.send((request, sender)).await.unwrap();
2022-10-18 13:19:03 +00:00
// Await on response channel
// Unwrap is safe here
receiver.await.unwrap()
}
}
2022-10-18 13:19:03 +00:00
/// Validation task
/// Load balance the validation requests between multiple validation workers
async fn validation_task(
workers: usize,
tokenizer: Tokenizer,
max_input_length: usize,
mut receiver: mpsc::Receiver<ValidationRequest>,
) {
let mut workers_senders = Vec::with_capacity(workers);
// Create workers
for _ in 0..workers {
let tokenizer_clone: Tokenizer = tokenizer.clone().into();
2022-10-18 13:19:03 +00:00
// Create channel to communicate with worker
let (worker_sender, worker_receiver) = mpsc::channel(workers);
workers_senders.push(worker_sender);
// Spawn worker
tokio::task::spawn_blocking(move || {
validation_worker(tokenizer_clone, max_input_length, worker_receiver)
});
}
loop {
// Load balance requests between workers
for sender in workers_senders.iter() {
if let Some(validation_request) = receiver.recv().await {
sender.send(validation_request).await.unwrap();
} else {
return;
}
}
}
}
/// Check the parameters inside the payload and get the number of tokens inside the input using
/// the tokenizer
fn validation_worker(
tokenizer: Tokenizer,
2022-10-18 13:19:03 +00:00
max_input_length: usize,
mut receiver: mpsc::Receiver<ValidationRequest>,
) {
// Loop over requests
while let Some((request, response_tx)) = receiver.blocking_recv() {
response_tx.send(validate(request, &tokenizer, max_input_length)).unwrap_or(())
}
}
fn validate(
request: GenerateRequest,
tokenizer: &Tokenizer,
max_input_length: usize,
) -> Result<(usize, GenerateRequest), ValidationError> {
if request.parameters.temperature <= 0.0 {
return Err(ValidationError::Temperature);
}
if request.parameters.top_p <= 0.0 || request.parameters.top_p > 1.0 {
return Err(ValidationError::TopP);
}
if request.parameters.top_k < 0 {
return Err(ValidationError::TopK);
}
if request.parameters.max_new_tokens > MAX_MAX_NEW_TOKENS {
return Err(ValidationError::MaxNewTokens(MAX_MAX_NEW_TOKENS));
}
if request.parameters.stop.len() > MAX_STOP_SEQUENCES {
return Err(ValidationError::StopSequence(
MAX_STOP_SEQUENCES, request.parameters.stop.len(),
))
}
// Get the number of tokens in the input
match tokenizer.encode(request.inputs.clone(), true) {
Ok(inputs) => {
let input_length = inputs.len();
if input_length > max_input_length {
Err(ValidationError::InputLength(
input_length,
max_input_length,
))
} else {
Ok((input_length, request))
2022-11-14 16:15:19 +00:00
}
},
Err(err) => Err(ValidationError::Tokenizer(err.to_string())),
}
}
2022-10-18 13:19:03 +00:00
type ValidationRequest = (
GenerateRequest,
oneshot::Sender<Result<(usize, GenerateRequest), ValidationError>>,
);
#[derive(Error, Debug)]
pub enum ValidationError {
2022-10-27 12:25:29 +00:00
#[error("temperature must be strictly positive")]
2022-10-18 13:19:03 +00:00
Temperature,
#[error("top_p must be > 0.0 and <= 1.0")]
2022-10-18 13:19:03 +00:00
TopP,
2022-10-27 12:25:29 +00:00
#[error("top_k must be strictly positive")]
2022-10-18 13:19:03 +00:00
TopK,
#[error("max_new_tokens must be <= {0}")]
MaxNewTokens(u32),
2022-10-27 12:25:29 +00:00
#[error("inputs must have less than {1} tokens. Given: {0}")]
2022-10-21 08:59:15 +00:00
InputLength(usize, usize),
#[error("stop supports up to {0} stop sequences. Given: {1}")]
StopSequence(usize, usize),
2022-11-14 16:15:19 +00:00
#[error("tokenizer error {0}")]
Tokenizer(String),
2022-10-18 13:19:03 +00:00
}
2022-10-27 12:25:29 +00:00
impl From<ValidationError> for (StatusCode, Json<ErrorResponse>) {
2022-10-18 13:19:03 +00:00
fn from(err: ValidationError) -> Self {
2022-10-27 12:25:29 +00:00
(
2022-11-14 13:34:15 +00:00
StatusCode::UNPROCESSABLE_ENTITY,
2022-10-27 12:25:29 +00:00
Json(ErrorResponse {
error: err.to_string(),
}),
)
2022-10-18 13:19:03 +00:00
}
}