2022-10-11 14:50:54 +00:00
|
|
|
use crate::server::GenerateRequest;
|
2022-10-17 12:59:00 +00:00
|
|
|
use axum::http::StatusCode;
|
|
|
|
use thiserror::Error;
|
2022-10-11 14:50:54 +00:00
|
|
|
use tokenizers::tokenizer::Tokenizer;
|
|
|
|
use tokio::sync::{mpsc, oneshot};
|
|
|
|
|
2022-10-17 12:59:00 +00:00
|
|
|
#[derive(Error, Debug)]
|
|
|
|
pub enum ValidationError {
|
|
|
|
#[error("Temperature must be strictly positive")]
|
|
|
|
Temperature,
|
|
|
|
#[error("Top p must be <= 0.0 or > 1.0")]
|
|
|
|
TopP,
|
|
|
|
#[error("Top k must be strictly positive")]
|
|
|
|
TopK,
|
|
|
|
#[error("Max New Tokens must be < 512")]
|
|
|
|
MaxNewTokens,
|
|
|
|
#[error("Inputs must have less than 512 tokens. Given: {0}")]
|
|
|
|
InputLength(usize),
|
|
|
|
}
|
|
|
|
|
|
|
|
impl From<ValidationError> for (StatusCode, String) {
|
|
|
|
fn from(err: ValidationError) -> Self {
|
|
|
|
(StatusCode::BAD_REQUEST, err.to_string())
|
|
|
|
}
|
|
|
|
}
|
2022-10-11 14:50:54 +00:00
|
|
|
|
|
|
|
type ValidationRequest = (
|
|
|
|
GenerateRequest,
|
|
|
|
oneshot::Sender<Result<(usize, GenerateRequest), ValidationError>>,
|
|
|
|
);
|
|
|
|
|
|
|
|
#[derive(Debug, Clone)]
|
|
|
|
pub(crate) struct Validation {
|
|
|
|
sender: mpsc::Sender<ValidationRequest>,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Validation {
|
|
|
|
pub(crate) fn new(tokenizer: Tokenizer) -> Self {
|
|
|
|
let (validation_sender, validation_receiver) = mpsc::channel(128);
|
|
|
|
|
|
|
|
tokio::spawn(validation_task(tokenizer, validation_receiver));
|
|
|
|
|
|
|
|
Self {
|
|
|
|
sender: validation_sender,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
pub(crate) async fn validate(
|
|
|
|
&self,
|
|
|
|
request: GenerateRequest,
|
|
|
|
) -> Result<(usize, GenerateRequest), ValidationError> {
|
|
|
|
let (sender, receiver) = oneshot::channel();
|
|
|
|
self.sender.send((request, sender)).await.unwrap();
|
|
|
|
receiver.await.unwrap()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
async fn validation_task(tokenizer: Tokenizer, mut receiver: mpsc::Receiver<ValidationRequest>) {
|
|
|
|
while let Some((request, response_tx)) = receiver.recv().await {
|
|
|
|
if request.parameters.temperature < 0.0 {
|
2022-10-17 12:59:00 +00:00
|
|
|
response_tx
|
|
|
|
.send(Err(ValidationError::Temperature))
|
|
|
|
.unwrap_or(());
|
2022-10-11 14:50:54 +00:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if request.parameters.top_p <= 0.0 || request.parameters.top_p > 1.0 {
|
2022-10-17 12:59:00 +00:00
|
|
|
response_tx.send(Err(ValidationError::TopP)).unwrap_or(());
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if request.parameters.top_k < 0 {
|
|
|
|
response_tx.send(Err(ValidationError::TopK)).unwrap_or(());
|
2022-10-11 14:50:54 +00:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if request.parameters.max_new_tokens > 512 {
|
2022-10-17 12:59:00 +00:00
|
|
|
response_tx
|
|
|
|
.send(Err(ValidationError::MaxNewTokens))
|
|
|
|
.unwrap_or(());
|
2022-10-11 14:50:54 +00:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
let inputs = tokenizer.encode(request.inputs.clone(), false).unwrap();
|
|
|
|
let input_length = inputs.len();
|
|
|
|
|
|
|
|
if input_length > 512 {
|
2022-10-17 12:59:00 +00:00
|
|
|
response_tx
|
|
|
|
.send(Err(ValidationError::InputLength(input_length)))
|
|
|
|
.unwrap_or(());
|
2022-10-11 14:50:54 +00:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
response_tx.send(Ok((input_length, request))).unwrap_or(());
|
|
|
|
}
|
|
|
|
}
|