2022-10-18 13:19:03 +00:00
|
|
|
/// Payload validation logic
|
2023-07-13 12:22:37 +00:00
|
|
|
use crate::validation::ValidationError::{BestOfSampling, BestOfSeed, EmptyInput};
|
2024-02-15 09:28:10 +00:00
|
|
|
use crate::{GenerateParameters, GenerateRequest, GrammarType};
|
2023-04-09 18:22:27 +00:00
|
|
|
use rand::{thread_rng, Rng};
|
2024-02-15 09:28:10 +00:00
|
|
|
use text_generation_client::{
|
|
|
|
GrammarType as ProtoGrammarType, NextTokenChooserParameters, StoppingCriteriaParameters,
|
|
|
|
};
|
2022-10-17 12:59:00 +00:00
|
|
|
use thiserror::Error;
|
2022-10-11 14:50:54 +00:00
|
|
|
use tokenizers::tokenizer::Tokenizer;
|
2023-03-09 12:10:30 +00:00
|
|
|
use tokenizers::TruncationDirection;
|
2023-10-23 13:51:12 +00:00
|
|
|
use tokio::sync::mpsc;
|
2023-04-09 18:22:27 +00:00
|
|
|
use tokio::sync::oneshot;
|
2023-02-13 12:02:45 +00:00
|
|
|
use tracing::{instrument, Span};
|
2022-10-11 14:50:54 +00:00
|
|
|
|
2022-10-18 13:19:03 +00:00
|
|
|
/// Validation
|
2022-10-11 14:50:54 +00:00
|
|
|
#[derive(Debug, Clone)]
|
2022-10-17 16:27:33 +00:00
|
|
|
pub struct Validation {
|
2023-04-09 18:22:27 +00:00
|
|
|
/// Validation parameters
|
2023-03-09 14:30:54 +00:00
|
|
|
max_best_of: usize,
|
2023-04-09 18:22:27 +00:00
|
|
|
max_stop_sequences: usize,
|
2023-08-28 09:43:47 +00:00
|
|
|
max_top_n_tokens: u32,
|
2023-04-09 18:22:27 +00:00
|
|
|
max_input_length: usize,
|
|
|
|
max_total_tokens: usize,
|
2024-02-15 09:28:10 +00:00
|
|
|
disable_grammar_support: bool,
|
2023-04-09 18:22:27 +00:00
|
|
|
/// Channel to communicate with the background tokenization task
|
2023-10-23 13:51:12 +00:00
|
|
|
sender: Option<mpsc::UnboundedSender<TokenizerRequest>>,
|
2022-10-11 14:50:54 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
impl Validation {
|
2023-02-15 20:56:59 +00:00
|
|
|
pub(crate) fn new(
|
|
|
|
workers: usize,
|
2023-04-09 18:22:27 +00:00
|
|
|
tokenizer: Option<Tokenizer>,
|
2023-03-09 14:30:54 +00:00
|
|
|
max_best_of: usize,
|
2023-02-15 20:56:59 +00:00
|
|
|
max_stop_sequences: usize,
|
2023-08-28 09:43:47 +00:00
|
|
|
max_top_n_tokens: u32,
|
2023-02-15 20:56:59 +00:00
|
|
|
max_input_length: usize,
|
|
|
|
max_total_tokens: usize,
|
2024-02-15 09:28:10 +00:00
|
|
|
disable_grammar_support: bool,
|
2023-02-15 20:56:59 +00:00
|
|
|
) -> Self {
|
2023-04-09 18:22:27 +00:00
|
|
|
// If we have a fast tokenizer
|
|
|
|
let sender = if let Some(tokenizer) = tokenizer {
|
2023-10-23 13:51:12 +00:00
|
|
|
// Create round robin channel
|
|
|
|
let (validation_sender, validation_round_robin_receiver) = mpsc::unbounded_channel();
|
|
|
|
let mut senders = Vec::with_capacity(workers);
|
2023-04-09 18:22:27 +00:00
|
|
|
|
|
|
|
// Create workers
|
|
|
|
for _ in 0..workers {
|
|
|
|
let tokenizer_clone = tokenizer.clone();
|
2023-10-23 13:51:12 +00:00
|
|
|
let (tokenizer_sender, tokenizer_receiver) = mpsc::unbounded_channel();
|
|
|
|
senders.push(tokenizer_sender);
|
2023-04-09 18:22:27 +00:00
|
|
|
|
|
|
|
// Spawn worker
|
|
|
|
tokio::task::spawn_blocking(move || {
|
2023-10-23 13:51:12 +00:00
|
|
|
tokenizer_worker(tokenizer_clone, tokenizer_receiver)
|
2023-04-09 18:22:27 +00:00
|
|
|
});
|
|
|
|
}
|
2023-10-23 13:51:12 +00:00
|
|
|
|
|
|
|
// Create tokenization round robin task
|
|
|
|
tokio::spawn(round_robin_task(validation_round_robin_receiver, senders));
|
|
|
|
|
2023-04-09 18:22:27 +00:00
|
|
|
Some(validation_sender)
|
|
|
|
} else {
|
|
|
|
None
|
|
|
|
};
|
|
|
|
|
|
|
|
Self {
|
|
|
|
max_best_of,
|
|
|
|
sender,
|
2023-02-15 20:56:59 +00:00
|
|
|
max_stop_sequences,
|
2023-08-28 09:43:47 +00:00
|
|
|
max_top_n_tokens,
|
2022-10-18 13:19:03 +00:00
|
|
|
max_input_length,
|
2023-02-15 20:56:59 +00:00
|
|
|
max_total_tokens,
|
2024-02-15 09:28:10 +00:00
|
|
|
disable_grammar_support,
|
2023-04-09 18:22:27 +00:00
|
|
|
}
|
|
|
|
}
|
2022-10-11 14:50:54 +00:00
|
|
|
|
2023-11-20 09:33:44 +00:00
|
|
|
#[instrument(skip(self, inputs))]
|
Add a new `/tokenize` route to get the tokenized input (#1471)
# What does this PR do?
Ideally this is done client side, but this is a recurring request,
therefore we implemented it.
- Runs only if rust tokenizer is present (not encumbering the main
inference pipeline is important).
- Returns simple results, ID, text (gotten with offsets from the
original string) and offsets (so users can do things like highlighting
text).
<!--
Congratulations! You've made it this far! You're not quite done yet
though.
Once merged, your PR is going to appear in the release notes with the
title you set, so make sure it's a great title that fully reflects the
extent of your awesome contribution.
Then, please replace this with a description of the change and which
issue is fixed (if applicable). Please also include relevant motivation
and context. List any dependencies (if any) that are required for this
change.
Once you're done, someone will review your PR shortly (see the section
"Who can review?" below to tag some potential reviewers). They may
suggest changes to make the code even better. If no one reviewed your PR
after a week has passed, don't hesitate to post a new comment
@-mentioning the same persons---sometimes notifications get lost.
-->
<!-- Remove if not applicable -->
Fixes # (issue)
## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?
## Who can review?
Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.
<!-- Your PR will be replied to more quickly if you can figure out the
right person to tag with @
@OlivierDehaene OR @Narsil
-->
2024-01-25 13:19:03 +00:00
|
|
|
pub async fn tokenize(
|
2023-04-09 18:22:27 +00:00
|
|
|
&self,
|
|
|
|
inputs: String,
|
|
|
|
truncate: Option<usize>,
|
Add a new `/tokenize` route to get the tokenized input (#1471)
# What does this PR do?
Ideally this is done client side, but this is a recurring request,
therefore we implemented it.
- Runs only if rust tokenizer is present (not encumbering the main
inference pipeline is important).
- Returns simple results, ID, text (gotten with offsets from the
original string) and offsets (so users can do things like highlighting
text).
<!--
Congratulations! You've made it this far! You're not quite done yet
though.
Once merged, your PR is going to appear in the release notes with the
title you set, so make sure it's a great title that fully reflects the
extent of your awesome contribution.
Then, please replace this with a description of the change and which
issue is fixed (if applicable). Please also include relevant motivation
and context. List any dependencies (if any) that are required for this
change.
Once you're done, someone will review your PR shortly (see the section
"Who can review?" below to tag some potential reviewers). They may
suggest changes to make the code even better. If no one reviewed your PR
after a week has passed, don't hesitate to post a new comment
@-mentioning the same persons---sometimes notifications get lost.
-->
<!-- Remove if not applicable -->
Fixes # (issue)
## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?
## Who can review?
Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.
<!-- Your PR will be replied to more quickly if you can figure out the
right person to tag with @
@OlivierDehaene OR @Narsil
-->
2024-01-25 13:19:03 +00:00
|
|
|
) -> Result<Option<(tokenizers::Encoding, String)>, ValidationError> {
|
2023-04-09 18:22:27 +00:00
|
|
|
// If we have a fast tokenizer
|
|
|
|
if let Some(sender) = &self.sender {
|
|
|
|
// Create response channel
|
|
|
|
let (response_sender, response_receiver) = oneshot::channel();
|
|
|
|
// Send request to the background validation task
|
|
|
|
// Unwrap is safe here
|
|
|
|
sender
|
|
|
|
.send(((inputs, truncate), response_sender, Span::current()))
|
|
|
|
.unwrap();
|
|
|
|
|
|
|
|
// Await on response channel
|
|
|
|
// Unwrap is safe here
|
Add a new `/tokenize` route to get the tokenized input (#1471)
# What does this PR do?
Ideally this is done client side, but this is a recurring request,
therefore we implemented it.
- Runs only if rust tokenizer is present (not encumbering the main
inference pipeline is important).
- Returns simple results, ID, text (gotten with offsets from the
original string) and offsets (so users can do things like highlighting
text).
<!--
Congratulations! You've made it this far! You're not quite done yet
though.
Once merged, your PR is going to appear in the release notes with the
title you set, so make sure it's a great title that fully reflects the
extent of your awesome contribution.
Then, please replace this with a description of the change and which
issue is fixed (if applicable). Please also include relevant motivation
and context. List any dependencies (if any) that are required for this
change.
Once you're done, someone will review your PR shortly (see the section
"Who can review?" below to tag some potential reviewers). They may
suggest changes to make the code even better. If no one reviewed your PR
after a week has passed, don't hesitate to post a new comment
@-mentioning the same persons---sometimes notifications get lost.
-->
<!-- Remove if not applicable -->
Fixes # (issue)
## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?
## Who can review?
Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.
<!-- Your PR will be replied to more quickly if you can figure out the
right person to tag with @
@OlivierDehaene OR @Narsil
-->
2024-01-25 13:19:03 +00:00
|
|
|
let encoding = response_receiver.await.unwrap()?;
|
|
|
|
Ok(Some(encoding))
|
|
|
|
} else {
|
|
|
|
Ok(None)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[instrument(skip(self, inputs))]
|
|
|
|
async fn validate_input(
|
|
|
|
&self,
|
|
|
|
inputs: String,
|
|
|
|
truncate: Option<usize>,
|
|
|
|
max_new_tokens: Option<u32>,
|
|
|
|
) -> Result<(String, usize, u32), ValidationError> {
|
|
|
|
// If we have a fast tokenizer
|
|
|
|
if let Some((encoding, inputs)) = self.tokenize(inputs.clone(), truncate).await? {
|
|
|
|
// Create response channel
|
|
|
|
let input_length = encoding.len();
|
2023-04-09 18:22:27 +00:00
|
|
|
|
|
|
|
// Get total tokens
|
2023-10-04 15:38:42 +00:00
|
|
|
let max_new_tokens: u32 = if let Some(max_new_tokens) = max_new_tokens {
|
|
|
|
max_new_tokens
|
|
|
|
} else {
|
|
|
|
self.max_total_tokens.saturating_sub(input_length) as u32
|
|
|
|
};
|
2023-04-09 18:22:27 +00:00
|
|
|
let total_tokens = input_length + max_new_tokens as usize;
|
|
|
|
|
|
|
|
// Validate MaxTotalTokens
|
|
|
|
if total_tokens > self.max_total_tokens {
|
|
|
|
return Err(ValidationError::MaxTotalTokens(
|
|
|
|
self.max_total_tokens,
|
|
|
|
input_length,
|
|
|
|
max_new_tokens,
|
|
|
|
));
|
|
|
|
}
|
|
|
|
|
|
|
|
// Validate InputLength
|
|
|
|
if input_length > self.max_input_length {
|
|
|
|
return Err(ValidationError::InputLength(
|
|
|
|
self.max_input_length,
|
|
|
|
input_length,
|
|
|
|
));
|
|
|
|
}
|
|
|
|
|
|
|
|
metrics::histogram!("tgi_request_input_length", input_length as f64);
|
2023-10-04 15:38:42 +00:00
|
|
|
Ok((inputs, input_length, max_new_tokens))
|
2023-04-09 18:22:27 +00:00
|
|
|
}
|
|
|
|
// Return inputs without validation
|
|
|
|
else {
|
|
|
|
// In this case, we don't know the real length in tokens of the inputs
|
|
|
|
// However, the inputs will be truncated by the python servers
|
|
|
|
// We make sure that truncate + max_new_tokens <= self.max_total_tokens
|
2023-10-04 15:38:42 +00:00
|
|
|
let max_new_tokens: u32 = if let Some(max_new_tokens) = max_new_tokens {
|
|
|
|
max_new_tokens
|
2023-10-23 13:51:12 +00:00
|
|
|
} else if let Some(truncate) = truncate {
|
|
|
|
self.max_total_tokens.saturating_sub(truncate) as u32
|
2023-10-04 15:38:42 +00:00
|
|
|
} else {
|
2023-10-23 13:51:12 +00:00
|
|
|
return Err(ValidationError::UnsetMaxNewTokens);
|
2023-10-04 15:38:42 +00:00
|
|
|
};
|
2023-10-11 08:46:40 +00:00
|
|
|
let input_length = truncate.unwrap_or(self.max_input_length);
|
2023-04-09 18:22:27 +00:00
|
|
|
|
|
|
|
// Validate MaxNewTokens
|
2023-04-24 15:59:00 +00:00
|
|
|
if (input_length as u32 + max_new_tokens) > self.max_total_tokens as u32 {
|
2023-04-09 18:22:27 +00:00
|
|
|
return Err(ValidationError::MaxNewTokens(
|
|
|
|
self.max_total_tokens - self.max_input_length,
|
|
|
|
max_new_tokens,
|
|
|
|
));
|
|
|
|
}
|
|
|
|
|
2023-10-04 15:38:42 +00:00
|
|
|
Ok((inputs, input_length, max_new_tokens))
|
2022-10-11 14:50:54 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-10-18 13:19:03 +00:00
|
|
|
/// Validate a payload and get the number of tokens in the input
|
2023-02-13 12:02:45 +00:00
|
|
|
#[instrument(skip_all)]
|
2022-10-11 14:50:54 +00:00
|
|
|
pub(crate) async fn validate(
|
|
|
|
&self,
|
|
|
|
request: GenerateRequest,
|
2023-01-31 16:04:00 +00:00
|
|
|
) -> Result<ValidGenerateRequest, ValidationError> {
|
2023-04-09 18:22:27 +00:00
|
|
|
let GenerateParameters {
|
|
|
|
best_of,
|
|
|
|
temperature,
|
|
|
|
repetition_penalty,
|
2024-02-08 17:41:25 +00:00
|
|
|
frequency_penalty,
|
2023-04-09 18:22:27 +00:00
|
|
|
top_k,
|
|
|
|
top_p,
|
|
|
|
typical_p,
|
|
|
|
do_sample,
|
|
|
|
max_new_tokens,
|
|
|
|
stop: stop_sequences,
|
|
|
|
truncate,
|
|
|
|
seed,
|
|
|
|
watermark,
|
2023-06-02 15:12:30 +00:00
|
|
|
decoder_input_details,
|
2023-08-28 09:43:47 +00:00
|
|
|
top_n_tokens,
|
2024-02-15 09:28:10 +00:00
|
|
|
grammar,
|
2023-04-09 18:22:27 +00:00
|
|
|
..
|
|
|
|
} = request.parameters;
|
|
|
|
|
|
|
|
// sampling must be true when best_of > 1
|
|
|
|
let best_of = best_of.unwrap_or(1);
|
|
|
|
let sampling = do_sample
|
|
|
|
|| temperature.is_some()
|
|
|
|
|| top_k.is_some()
|
|
|
|
|| top_p.is_some()
|
|
|
|
|| typical_p.is_some();
|
|
|
|
|
|
|
|
if best_of > 1 && !sampling {
|
|
|
|
return Err(BestOfSampling);
|
|
|
|
}
|
|
|
|
|
|
|
|
let temperature = temperature.unwrap_or(1.0);
|
|
|
|
if temperature <= 0.0 {
|
|
|
|
return Err(ValidationError::Temperature);
|
|
|
|
}
|
|
|
|
|
|
|
|
let repetition_penalty = repetition_penalty.unwrap_or(1.0);
|
|
|
|
if repetition_penalty <= 0.0 {
|
|
|
|
return Err(ValidationError::RepetitionPenalty);
|
|
|
|
}
|
|
|
|
|
2024-02-08 17:41:25 +00:00
|
|
|
let frequency_penalty = frequency_penalty.unwrap_or(0.0);
|
|
|
|
if !(-2.0..=2.0).contains(&frequency_penalty) {
|
|
|
|
return Err(ValidationError::FrequencyPenalty);
|
|
|
|
}
|
|
|
|
|
2023-04-09 18:22:27 +00:00
|
|
|
// Different because the proto default value is not a valid value
|
|
|
|
// for the user
|
|
|
|
let top_p = top_p
|
|
|
|
.map(|value| {
|
|
|
|
if value <= 0.0 || value >= 1.0 {
|
|
|
|
return Err(ValidationError::TopP);
|
|
|
|
}
|
|
|
|
Ok(value)
|
|
|
|
})
|
|
|
|
.unwrap_or(Ok(1.0))?;
|
|
|
|
|
|
|
|
let typical_p = typical_p
|
|
|
|
.map(|value| {
|
|
|
|
if value <= 0.0 || value >= 1.0 {
|
|
|
|
return Err(ValidationError::TypicalP);
|
|
|
|
}
|
|
|
|
Ok(value)
|
|
|
|
})
|
|
|
|
.unwrap_or(Ok(1.0))?;
|
|
|
|
|
|
|
|
let top_k: u32 = top_k
|
|
|
|
.map(|value| {
|
|
|
|
if value <= 0 {
|
|
|
|
return Err(ValidationError::TopK);
|
|
|
|
}
|
|
|
|
Ok(value as u32)
|
|
|
|
})
|
|
|
|
.unwrap_or(Ok(0))?;
|
|
|
|
|
2023-10-04 15:38:42 +00:00
|
|
|
if max_new_tokens == Some(0) {
|
2023-04-09 18:22:27 +00:00
|
|
|
return Err(ValidationError::NegativeMaxNewTokens);
|
|
|
|
}
|
|
|
|
|
|
|
|
if stop_sequences.len() > self.max_stop_sequences {
|
|
|
|
return Err(ValidationError::StopSequence(
|
|
|
|
self.max_stop_sequences,
|
|
|
|
stop_sequences.len(),
|
|
|
|
));
|
|
|
|
}
|
|
|
|
|
|
|
|
// If seed is None, assign a random one
|
|
|
|
let seed = match seed {
|
|
|
|
None => thread_rng().gen(),
|
|
|
|
Some(seed) => {
|
|
|
|
if best_of > 1 {
|
|
|
|
return Err(BestOfSeed);
|
|
|
|
}
|
|
|
|
seed
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2023-08-28 09:43:47 +00:00
|
|
|
let top_n_tokens = top_n_tokens
|
|
|
|
.map(|value| {
|
|
|
|
if value > self.max_top_n_tokens {
|
|
|
|
return Err(ValidationError::TopNTokens(self.max_top_n_tokens, value));
|
|
|
|
}
|
|
|
|
Ok(value)
|
|
|
|
})
|
|
|
|
.unwrap_or(Ok(0))?;
|
|
|
|
|
2023-04-09 18:22:27 +00:00
|
|
|
// Check if inputs is empty
|
|
|
|
if request.inputs.is_empty() {
|
|
|
|
return Err(EmptyInput);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check if truncate is strictly positive and less than max_input_length
|
|
|
|
let truncate = truncate
|
|
|
|
.map(|value| {
|
|
|
|
if value == 0 || value > self.max_input_length {
|
|
|
|
return Err(ValidationError::Truncate(self.max_input_length, value));
|
|
|
|
}
|
|
|
|
Ok(Some(value))
|
|
|
|
})
|
|
|
|
.unwrap_or(Ok(None))?;
|
|
|
|
|
|
|
|
// Validate inputs
|
2023-10-04 15:38:42 +00:00
|
|
|
let (inputs, input_length, max_new_tokens) = self
|
2023-04-09 18:22:27 +00:00
|
|
|
.validate_input(request.inputs, truncate, max_new_tokens)
|
|
|
|
.await?;
|
|
|
|
|
2024-02-15 09:28:10 +00:00
|
|
|
// TODO: we should build the FSM here and pass the compiled FSM instead of the grammar
|
|
|
|
// NOTE: this is currently difficult because we need the tokenizer in Python to build
|
|
|
|
// the FSM and we'd have to load a copy of the tokenizer into our Pyo3 instance which
|
|
|
|
// may be slow and memory intensive. Best case is to have a Rust implementation of the FSM
|
|
|
|
// compiler and use that to build the FSM here.
|
|
|
|
|
|
|
|
// Validate grammar and unpack the grammar and type for the proto message
|
|
|
|
let (grammar, grammar_type) = match grammar {
|
|
|
|
Some(grammar) => {
|
|
|
|
// Ensure that grammar is not set if it's not supported
|
|
|
|
if self.disable_grammar_support {
|
|
|
|
return Err(ValidationError::Grammar);
|
|
|
|
}
|
|
|
|
match grammar {
|
|
|
|
// currently both are handled the same way since compilation is done in Python
|
|
|
|
GrammarType::Json(json) => (json, ProtoGrammarType::Json.into()),
|
|
|
|
GrammarType::Regex(regex) => (regex, ProtoGrammarType::Regex.into()),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
None => (String::new(), ProtoGrammarType::None.into()),
|
|
|
|
};
|
|
|
|
|
2023-04-09 18:22:27 +00:00
|
|
|
let parameters = NextTokenChooserParameters {
|
|
|
|
temperature,
|
|
|
|
repetition_penalty,
|
2024-02-08 17:41:25 +00:00
|
|
|
frequency_penalty,
|
2023-04-09 18:22:27 +00:00
|
|
|
top_k,
|
|
|
|
top_p,
|
|
|
|
typical_p,
|
|
|
|
do_sample,
|
|
|
|
seed,
|
|
|
|
watermark,
|
2024-02-15 09:28:10 +00:00
|
|
|
grammar,
|
|
|
|
grammar_type,
|
2023-04-09 18:22:27 +00:00
|
|
|
};
|
|
|
|
let stopping_parameters = StoppingCriteriaParameters {
|
|
|
|
max_new_tokens,
|
|
|
|
stop_sequences,
|
|
|
|
ignore_eos_token: false,
|
|
|
|
};
|
|
|
|
|
|
|
|
metrics::histogram!("tgi_request_max_new_tokens", max_new_tokens as f64);
|
|
|
|
|
|
|
|
Ok(ValidGenerateRequest {
|
|
|
|
inputs,
|
2023-06-02 15:12:30 +00:00
|
|
|
decoder_input_details,
|
2023-04-24 15:59:00 +00:00
|
|
|
input_length: input_length as u32,
|
2023-04-09 18:22:27 +00:00
|
|
|
truncate: truncate.unwrap_or(self.max_input_length) as u32,
|
|
|
|
parameters,
|
|
|
|
stopping_parameters,
|
2023-09-27 08:40:18 +00:00
|
|
|
top_n_tokens,
|
2023-04-09 18:22:27 +00:00
|
|
|
})
|
2022-10-11 14:50:54 +00:00
|
|
|
}
|
2023-03-09 14:30:54 +00:00
|
|
|
|
|
|
|
/// Validate the best_of parameter
|
|
|
|
#[instrument(skip_all)]
|
|
|
|
pub(crate) fn validate_best_of(&self, best_of: usize) -> Result<usize, ValidationError> {
|
|
|
|
if self.max_best_of == 1 && best_of != 1 {
|
|
|
|
return Err(ValidationError::BestOfDisabled);
|
|
|
|
}
|
|
|
|
|
|
|
|
if best_of > self.max_best_of {
|
|
|
|
return Err(ValidationError::BestOf(self.max_best_of, best_of));
|
|
|
|
}
|
|
|
|
|
|
|
|
Ok(best_of)
|
|
|
|
}
|
2022-10-11 14:50:54 +00:00
|
|
|
}
|
|
|
|
|
2023-10-23 13:51:12 +00:00
|
|
|
/// Round robin tokenization task
|
|
|
|
async fn round_robin_task(
|
|
|
|
mut receiver: mpsc::UnboundedReceiver<TokenizerRequest>,
|
|
|
|
senders: Vec<mpsc::UnboundedSender<TokenizerRequest>>,
|
|
|
|
) {
|
|
|
|
loop {
|
|
|
|
for sender in &senders {
|
|
|
|
match receiver.recv().await {
|
|
|
|
None => return,
|
|
|
|
Some(request) => sender.send(request).unwrap(),
|
|
|
|
};
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-04-09 18:22:27 +00:00
|
|
|
/// Start tokenization workers
|
2023-10-23 13:51:12 +00:00
|
|
|
fn tokenizer_worker(tokenizer: Tokenizer, mut receiver: mpsc::UnboundedReceiver<TokenizerRequest>) {
|
2022-10-18 13:19:03 +00:00
|
|
|
// Loop over requests
|
2023-10-23 13:51:12 +00:00
|
|
|
while let Some(((inputs, truncate), response_tx, parent_span)) = receiver.blocking_recv() {
|
2023-02-13 12:02:45 +00:00
|
|
|
parent_span.in_scope(|| {
|
|
|
|
response_tx
|
2023-04-09 18:22:27 +00:00
|
|
|
.send(prepare_input(inputs, truncate, &tokenizer))
|
2023-02-13 12:02:45 +00:00
|
|
|
.unwrap_or(())
|
|
|
|
})
|
2023-01-03 09:41:22 +00:00
|
|
|
}
|
|
|
|
}
|
2022-10-11 14:50:54 +00:00
|
|
|
|
2023-04-09 18:22:27 +00:00
|
|
|
/// Get input length and optionally truncate it
|
|
|
|
fn prepare_input(
|
Add a new `/tokenize` route to get the tokenized input (#1471)
# What does this PR do?
Ideally this is done client side, but this is a recurring request,
therefore we implemented it.
- Runs only if rust tokenizer is present (not encumbering the main
inference pipeline is important).
- Returns simple results, ID, text (gotten with offsets from the
original string) and offsets (so users can do things like highlighting
text).
<!--
Congratulations! You've made it this far! You're not quite done yet
though.
Once merged, your PR is going to appear in the release notes with the
title you set, so make sure it's a great title that fully reflects the
extent of your awesome contribution.
Then, please replace this with a description of the change and which
issue is fixed (if applicable). Please also include relevant motivation
and context. List any dependencies (if any) that are required for this
change.
Once you're done, someone will review your PR shortly (see the section
"Who can review?" below to tag some potential reviewers). They may
suggest changes to make the code even better. If no one reviewed your PR
after a week has passed, don't hesitate to post a new comment
@-mentioning the same persons---sometimes notifications get lost.
-->
<!-- Remove if not applicable -->
Fixes # (issue)
## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?
## Who can review?
Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.
<!-- Your PR will be replied to more quickly if you can figure out the
right person to tag with @
@OlivierDehaene OR @Narsil
-->
2024-01-25 13:19:03 +00:00
|
|
|
mut inputs: String,
|
2023-04-09 18:22:27 +00:00
|
|
|
truncate: Option<usize>,
|
2023-01-03 09:41:22 +00:00
|
|
|
tokenizer: &Tokenizer,
|
Add a new `/tokenize` route to get the tokenized input (#1471)
# What does this PR do?
Ideally this is done client side, but this is a recurring request,
therefore we implemented it.
- Runs only if rust tokenizer is present (not encumbering the main
inference pipeline is important).
- Returns simple results, ID, text (gotten with offsets from the
original string) and offsets (so users can do things like highlighting
text).
<!--
Congratulations! You've made it this far! You're not quite done yet
though.
Once merged, your PR is going to appear in the release notes with the
title you set, so make sure it's a great title that fully reflects the
extent of your awesome contribution.
Then, please replace this with a description of the change and which
issue is fixed (if applicable). Please also include relevant motivation
and context. List any dependencies (if any) that are required for this
change.
Once you're done, someone will review your PR shortly (see the section
"Who can review?" below to tag some potential reviewers). They may
suggest changes to make the code even better. If no one reviewed your PR
after a week has passed, don't hesitate to post a new comment
@-mentioning the same persons---sometimes notifications get lost.
-->
<!-- Remove if not applicable -->
Fixes # (issue)
## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?
## Who can review?
Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.
<!-- Your PR will be replied to more quickly if you can figure out the
right person to tag with @
@OlivierDehaene OR @Narsil
-->
2024-01-25 13:19:03 +00:00
|
|
|
) -> Result<(tokenizers::Encoding, String), ValidationError> {
|
2023-03-09 12:10:30 +00:00
|
|
|
// Get the number of tokens in the input
|
|
|
|
let mut encoding = tokenizer
|
2023-04-09 18:22:27 +00:00
|
|
|
.encode(inputs.clone(), true)
|
2023-03-09 12:10:30 +00:00
|
|
|
.map_err(|err| ValidationError::Tokenizer(err.to_string()))?;
|
|
|
|
|
2023-04-09 18:22:27 +00:00
|
|
|
// Optionally truncate
|
Add a new `/tokenize` route to get the tokenized input (#1471)
# What does this PR do?
Ideally this is done client side, but this is a recurring request,
therefore we implemented it.
- Runs only if rust tokenizer is present (not encumbering the main
inference pipeline is important).
- Returns simple results, ID, text (gotten with offsets from the
original string) and offsets (so users can do things like highlighting
text).
<!--
Congratulations! You've made it this far! You're not quite done yet
though.
Once merged, your PR is going to appear in the release notes with the
title you set, so make sure it's a great title that fully reflects the
extent of your awesome contribution.
Then, please replace this with a description of the change and which
issue is fixed (if applicable). Please also include relevant motivation
and context. List any dependencies (if any) that are required for this
change.
Once you're done, someone will review your PR shortly (see the section
"Who can review?" below to tag some potential reviewers). They may
suggest changes to make the code even better. If no one reviewed your PR
after a week has passed, don't hesitate to post a new comment
@-mentioning the same persons---sometimes notifications get lost.
-->
<!-- Remove if not applicable -->
Fixes # (issue)
## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?
## Who can review?
Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.
<!-- Your PR will be replied to more quickly if you can figure out the
right person to tag with @
@OlivierDehaene OR @Narsil
-->
2024-01-25 13:19:03 +00:00
|
|
|
if let Some(truncate) = truncate {
|
|
|
|
if truncate < encoding.len() {
|
2023-04-09 18:22:27 +00:00
|
|
|
encoding.truncate(truncate, 0, TruncationDirection::Left);
|
Add a new `/tokenize` route to get the tokenized input (#1471)
# What does this PR do?
Ideally this is done client side, but this is a recurring request,
therefore we implemented it.
- Runs only if rust tokenizer is present (not encumbering the main
inference pipeline is important).
- Returns simple results, ID, text (gotten with offsets from the
original string) and offsets (so users can do things like highlighting
text).
<!--
Congratulations! You've made it this far! You're not quite done yet
though.
Once merged, your PR is going to appear in the release notes with the
title you set, so make sure it's a great title that fully reflects the
extent of your awesome contribution.
Then, please replace this with a description of the change and which
issue is fixed (if applicable). Please also include relevant motivation
and context. List any dependencies (if any) that are required for this
change.
Once you're done, someone will review your PR shortly (see the section
"Who can review?" below to tag some potential reviewers). They may
suggest changes to make the code even better. If no one reviewed your PR
after a week has passed, don't hesitate to post a new comment
@-mentioning the same persons---sometimes notifications get lost.
-->
<!-- Remove if not applicable -->
Fixes # (issue)
## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?
## Who can review?
Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.
<!-- Your PR will be replied to more quickly if you can figure out the
right person to tag with @
@OlivierDehaene OR @Narsil
-->
2024-01-25 13:19:03 +00:00
|
|
|
inputs = tokenizer
|
2023-08-14 17:26:19 +00:00
|
|
|
.decode(encoding.get_ids(), false)
|
2023-04-09 18:22:27 +00:00
|
|
|
.map_err(|err| ValidationError::Tokenizer(err.to_string()))?;
|
|
|
|
}
|
Add a new `/tokenize` route to get the tokenized input (#1471)
# What does this PR do?
Ideally this is done client side, but this is a recurring request,
therefore we implemented it.
- Runs only if rust tokenizer is present (not encumbering the main
inference pipeline is important).
- Returns simple results, ID, text (gotten with offsets from the
original string) and offsets (so users can do things like highlighting
text).
<!--
Congratulations! You've made it this far! You're not quite done yet
though.
Once merged, your PR is going to appear in the release notes with the
title you set, so make sure it's a great title that fully reflects the
extent of your awesome contribution.
Then, please replace this with a description of the change and which
issue is fixed (if applicable). Please also include relevant motivation
and context. List any dependencies (if any) that are required for this
change.
Once you're done, someone will review your PR shortly (see the section
"Who can review?" below to tag some potential reviewers). They may
suggest changes to make the code even better. If no one reviewed your PR
after a week has passed, don't hesitate to post a new comment
@-mentioning the same persons---sometimes notifications get lost.
-->
<!-- Remove if not applicable -->
Fixes # (issue)
## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?
## Who can review?
Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.
<!-- Your PR will be replied to more quickly if you can figure out the
right person to tag with @
@OlivierDehaene OR @Narsil
-->
2024-01-25 13:19:03 +00:00
|
|
|
}
|
2023-03-09 12:10:30 +00:00
|
|
|
|
Add a new `/tokenize` route to get the tokenized input (#1471)
# What does this PR do?
Ideally this is done client side, but this is a recurring request,
therefore we implemented it.
- Runs only if rust tokenizer is present (not encumbering the main
inference pipeline is important).
- Returns simple results, ID, text (gotten with offsets from the
original string) and offsets (so users can do things like highlighting
text).
<!--
Congratulations! You've made it this far! You're not quite done yet
though.
Once merged, your PR is going to appear in the release notes with the
title you set, so make sure it's a great title that fully reflects the
extent of your awesome contribution.
Then, please replace this with a description of the change and which
issue is fixed (if applicable). Please also include relevant motivation
and context. List any dependencies (if any) that are required for this
change.
Once you're done, someone will review your PR shortly (see the section
"Who can review?" below to tag some potential reviewers). They may
suggest changes to make the code even better. If no one reviewed your PR
after a week has passed, don't hesitate to post a new comment
@-mentioning the same persons---sometimes notifications get lost.
-->
<!-- Remove if not applicable -->
Fixes # (issue)
## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?
## Who can review?
Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.
<!-- Your PR will be replied to more quickly if you can figure out the
right person to tag with @
@OlivierDehaene OR @Narsil
-->
2024-01-25 13:19:03 +00:00
|
|
|
Ok((encoding, inputs))
|
2022-10-11 14:50:54 +00:00
|
|
|
}
|
2022-10-18 13:19:03 +00:00
|
|
|
|
2023-04-09 18:22:27 +00:00
|
|
|
type TokenizerRequest = (
|
|
|
|
(String, Option<usize>),
|
Add a new `/tokenize` route to get the tokenized input (#1471)
# What does this PR do?
Ideally this is done client side, but this is a recurring request,
therefore we implemented it.
- Runs only if rust tokenizer is present (not encumbering the main
inference pipeline is important).
- Returns simple results, ID, text (gotten with offsets from the
original string) and offsets (so users can do things like highlighting
text).
<!--
Congratulations! You've made it this far! You're not quite done yet
though.
Once merged, your PR is going to appear in the release notes with the
title you set, so make sure it's a great title that fully reflects the
extent of your awesome contribution.
Then, please replace this with a description of the change and which
issue is fixed (if applicable). Please also include relevant motivation
and context. List any dependencies (if any) that are required for this
change.
Once you're done, someone will review your PR shortly (see the section
"Who can review?" below to tag some potential reviewers). They may
suggest changes to make the code even better. If no one reviewed your PR
after a week has passed, don't hesitate to post a new comment
@-mentioning the same persons---sometimes notifications get lost.
-->
<!-- Remove if not applicable -->
Fixes # (issue)
## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?
## Who can review?
Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.
<!-- Your PR will be replied to more quickly if you can figure out the
right person to tag with @
@OlivierDehaene OR @Narsil
-->
2024-01-25 13:19:03 +00:00
|
|
|
oneshot::Sender<Result<(tokenizers::Encoding, String), ValidationError>>,
|
2023-02-13 12:02:45 +00:00
|
|
|
Span,
|
2022-10-18 13:19:03 +00:00
|
|
|
);
|
|
|
|
|
feat: supports openai chat completions API (#1427)
This PR adds support to make TGI a drop in replacement for OpenAI
clients by exposing the same HTTP interface.
Notes
- TGI inits a single model at startup so the `model` field is unused in
HTTP requests.
- `max_tokens` and `stream` should work as expected but other params may
be (unimplemented or not supported)
General approach
- fetch the `tokenizer_config` at startup from the hub
- pass `tokenizer_config` into `Infer` so we have it at request time
- use the `chat_template` on the config to format chat request
- parse jinja template and render chat string
- pass inputs into existing generate function
- wrap generation output in expected structure before returning
# How to test
### Streaming curl
```bash
curl localhost:3000/v1/chat/completions \
-X POST \
-d '{
"model": "tgi",
"messages": [
{
"role": "system",
"content": "You are a helpful assistant."
},
{
"role": "user",
"content": "What is deep learning?"
}
],
"stream": true,
"max_tokens": 20
}' \
-H 'Content-Type: application/json'
```
It is also possible to use the `openai` python library and change the
base url
### 🌊 STREAMING REQUEST
```python
from openai import OpenAI
# init the client but point it to TGI
client = OpenAI(
base_url="http://localhost:3000/v1",
api_key="not needed for a local LLM"
)
chat_completion = client.chat.completions.create(
model="tgi",
messages=[
{"role": "system", "content": "You are a helpful assistant." },
{"role": "user", "content": "What is deep learning?"}
],
stream=True
)
# iterate and print stream
for message in chat_completion:
print(message)
# ChatCompletionChunk(id='', choices=[Choice(delta=ChoiceDelta(content=' that', function_call=None, role='assistant', tool_calls=None), finish_reason=None, index=2, logprobs=None)], created=1704486761, model='', object='text_completion', system_fingerprint='')
```
### 🚗 SYNCHRONOUS REQUEST
```python
from openai import OpenAI
# init the client but point it to TGI
client = OpenAI(
base_url="http://localhost:3000/v1",
api_key="not needed for a local LLM"
)
chat_completion = client.chat.completions.create(
model="tgi",
messages=[
{"role": "system", "content": "You are a helpful assistant." },
{"role": "user", "content": "What is deep learning?"}
],
stream=False
)
print(chat_completion)
# ChatCompletion(id='', choices=[Choice(finish_reason=None, index=0, logprobs=None, message=ChatCompletionMessage(content='\nDeep learning is a new field of research that has been gaining traction in the last ...', role='assistant', function_call=None, tool_calls=None))], created=1704486762, model='', object='text_completion', system_fingerprint='', usage=CompletionUsage(completion_tokens=100, prompt_tokens=76, total_tokens=176))
```
## How to run dev
```bash
cd text-generation-inference/server
MASTER_ADDR=127.0.0.1 MASTER_PORT=5555 text-generation-server serve --trust-remote-code gpt2
```
***note many of the existing `chat_templates` use non standard `jinja`
(ie. adding a `raise` to the template) which will throw an error when
parsing; hence using `upstage/SOLAR-10.7B-Instruct-v1.0` since it has a
valid template
```bash
cd text-generation-inference/router
cargo run -- --tokenizer-name upstage/SOLAR-10.7B-Instruct-v1.0
```
trigger
```bash
curl localhost:3000/v1/chat/completions \
-X POST \
-d '{ "model": "gpt-3.5-turbo", "messages": [ { "role": "system", "content": "You are a helpful assistant." }, { "role": "user", "content": "What is the IP address of the Google DNS servers?" } ], "stream": true, "max_tokens": 20, "logprobs": true }' \
-H 'Content-Type: application/json'
```
^ supports `stream: true` and `stream: false` requests
2024-01-16 10:07:41 +00:00
|
|
|
#[derive(Debug, Clone)]
|
2023-01-31 16:04:00 +00:00
|
|
|
pub(crate) struct ValidGenerateRequest {
|
|
|
|
pub inputs: String,
|
2023-04-24 15:59:00 +00:00
|
|
|
pub input_length: u32,
|
2023-04-09 18:22:27 +00:00
|
|
|
pub truncate: u32,
|
2023-06-02 15:12:30 +00:00
|
|
|
pub decoder_input_details: bool,
|
2023-01-31 16:04:00 +00:00
|
|
|
pub parameters: NextTokenChooserParameters,
|
|
|
|
pub stopping_parameters: StoppingCriteriaParameters,
|
2023-08-28 09:43:47 +00:00
|
|
|
pub top_n_tokens: u32,
|
2023-01-31 16:04:00 +00:00
|
|
|
}
|
|
|
|
|
2022-10-18 13:19:03 +00:00
|
|
|
#[derive(Error, Debug)]
|
|
|
|
pub enum ValidationError {
|
2023-03-09 14:30:54 +00:00
|
|
|
#[error("`best_of` must be > 0 and <= {0}. Given: {1}")]
|
|
|
|
BestOf(usize, usize),
|
|
|
|
#[error("`best_of` != 1 is not allowed for this endpoint")]
|
|
|
|
BestOfDisabled,
|
|
|
|
#[error("you must use sampling when `best_of` is > 1")]
|
|
|
|
BestOfSampling,
|
|
|
|
#[error("`seed` must not be set when `best_of` > 1")]
|
|
|
|
BestOfSeed,
|
|
|
|
#[error("`best_of` != 1 is not supported when streaming tokens")]
|
|
|
|
BestOfStream,
|
2023-08-28 09:43:47 +00:00
|
|
|
#[error("`top_n_tokens` must be >= 0 and <= {0}. Given: {1}")]
|
|
|
|
TopNTokens(u32, u32),
|
|
|
|
#[error("`top_n_tokens` != 0 is not allowed for this endpoint")]
|
|
|
|
TopNTokensDisabled,
|
2023-06-02 15:12:30 +00:00
|
|
|
#[error("`decoder_input_details` == true is not supported when streaming tokens")]
|
|
|
|
PrefillDetailsStream,
|
2023-03-07 17:52:22 +00:00
|
|
|
#[error("`temperature` must be strictly positive")]
|
2022-10-18 13:19:03 +00:00
|
|
|
Temperature,
|
2023-03-07 17:52:22 +00:00
|
|
|
#[error("`repetition_penalty` must be strictly positive")]
|
2023-02-01 14:58:42 +00:00
|
|
|
RepetitionPenalty,
|
2024-02-08 17:41:25 +00:00
|
|
|
#[error("`frequency_penalty` must be >= -2.0 and <= 2.0")]
|
|
|
|
FrequencyPenalty,
|
2023-03-09 10:33:57 +00:00
|
|
|
#[error("`top_p` must be > 0.0 and < 1.0")]
|
2022-10-18 13:19:03 +00:00
|
|
|
TopP,
|
2023-03-07 17:52:22 +00:00
|
|
|
#[error("`top_k` must be strictly positive")]
|
2022-10-18 13:19:03 +00:00
|
|
|
TopK,
|
2023-03-09 12:10:30 +00:00
|
|
|
#[error("`truncate` must be strictly positive and less than {0}. Given: {1}")]
|
|
|
|
Truncate(usize, usize),
|
2023-03-09 10:33:57 +00:00
|
|
|
#[error("`typical_p` must be > 0.0 and < 1.0")]
|
|
|
|
TypicalP,
|
2023-10-11 08:46:40 +00:00
|
|
|
#[error("one of `max_new_tokens` or `truncate` must be set if a fast tokenizer is not in use")]
|
|
|
|
UnsetMaxNewTokens,
|
2023-03-07 17:52:22 +00:00
|
|
|
#[error("`max_new_tokens` must be strictly positive")]
|
2023-04-09 18:22:27 +00:00
|
|
|
NegativeMaxNewTokens,
|
|
|
|
#[error("`max_new_tokens` must be <= {0}. Given: {1}")]
|
|
|
|
MaxNewTokens(usize, u32),
|
2023-03-07 17:52:22 +00:00
|
|
|
#[error("`inputs` tokens + `max_new_tokens` must be <= {0}. Given: {1} `inputs` tokens and {2} `max_new_tokens`")]
|
2023-02-15 20:56:59 +00:00
|
|
|
MaxTotalTokens(usize, usize, u32),
|
2023-03-07 17:52:22 +00:00
|
|
|
#[error("`inputs` must have less than {0} tokens. Given: {1}")]
|
2022-10-21 08:59:15 +00:00
|
|
|
InputLength(usize, usize),
|
2023-03-07 17:52:22 +00:00
|
|
|
#[error("`inputs` cannot be empty")]
|
2023-02-15 20:56:59 +00:00
|
|
|
EmptyInput,
|
2023-03-07 17:52:22 +00:00
|
|
|
#[error("`stop` supports up to {0} stop sequences. Given: {1}")]
|
2023-01-03 09:41:22 +00:00
|
|
|
StopSequence(usize, usize),
|
2022-11-14 16:15:19 +00:00
|
|
|
#[error("tokenizer error {0}")]
|
|
|
|
Tokenizer(String),
|
2024-02-15 09:28:10 +00:00
|
|
|
#[error("grammar is not supported")]
|
|
|
|
Grammar,
|
2022-10-18 13:19:03 +00:00
|
|
|
}
|
2023-04-25 12:13:14 +00:00
|
|
|
|
|
|
|
#[cfg(test)]
|
2023-04-26 18:23:54 +00:00
|
|
|
mod tests {
|
2023-04-25 12:13:14 +00:00
|
|
|
use super::*;
|
2023-04-26 14:14:40 +00:00
|
|
|
use crate::default_parameters;
|
|
|
|
use crate::tests::get_tokenizer;
|
2023-04-25 12:13:14 +00:00
|
|
|
|
|
|
|
#[tokio::test]
|
2023-04-26 18:23:54 +00:00
|
|
|
async fn test_validation_max_new_tokens() {
|
2023-04-25 12:13:14 +00:00
|
|
|
let tokenizer = None;
|
|
|
|
let max_best_of = 2;
|
|
|
|
let max_stop_sequence = 3;
|
2023-08-28 09:43:47 +00:00
|
|
|
let max_top_n_tokens = 4;
|
|
|
|
let max_input_length = 5;
|
|
|
|
let max_total_tokens = 6;
|
2023-04-25 12:13:14 +00:00
|
|
|
let workers = 1;
|
2024-02-15 09:28:10 +00:00
|
|
|
let disable_grammar_support = true;
|
2023-04-26 18:23:54 +00:00
|
|
|
let validation = Validation::new(
|
|
|
|
workers,
|
|
|
|
tokenizer,
|
|
|
|
max_best_of,
|
|
|
|
max_stop_sequence,
|
2023-08-28 09:43:47 +00:00
|
|
|
max_top_n_tokens,
|
2023-04-26 18:23:54 +00:00
|
|
|
max_input_length,
|
|
|
|
max_total_tokens,
|
2024-02-15 09:28:10 +00:00
|
|
|
disable_grammar_support,
|
2023-04-26 18:23:54 +00:00
|
|
|
);
|
2023-04-25 12:13:14 +00:00
|
|
|
|
|
|
|
let max_new_tokens = 10;
|
2023-04-26 18:23:54 +00:00
|
|
|
match validation
|
2023-10-04 15:38:42 +00:00
|
|
|
.validate_input("Hello".to_string(), None, Some(max_new_tokens))
|
2023-04-26 18:23:54 +00:00
|
|
|
.await
|
|
|
|
{
|
2023-04-25 12:13:14 +00:00
|
|
|
Err(ValidationError::MaxNewTokens(1, 10)) => (),
|
2023-04-26 18:23:54 +00:00
|
|
|
_ => panic!("Unexpected not max new tokens"),
|
2023-04-25 12:13:14 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[tokio::test]
|
2023-04-26 18:23:54 +00:00
|
|
|
async fn test_validation_input_length() {
|
2023-04-25 12:13:14 +00:00
|
|
|
let tokenizer = Some(get_tokenizer().await);
|
|
|
|
let max_best_of = 2;
|
|
|
|
let max_stop_sequence = 3;
|
2023-08-28 09:43:47 +00:00
|
|
|
let max_top_n_tokens = 4;
|
|
|
|
let max_input_length = 5;
|
|
|
|
let max_total_tokens = 6;
|
2024-02-15 09:28:10 +00:00
|
|
|
let disable_grammar_support = true;
|
2023-04-25 12:13:14 +00:00
|
|
|
let workers = 1;
|
2023-04-26 18:23:54 +00:00
|
|
|
let validation = Validation::new(
|
|
|
|
workers,
|
|
|
|
tokenizer,
|
|
|
|
max_best_of,
|
|
|
|
max_stop_sequence,
|
2023-08-28 09:43:47 +00:00
|
|
|
max_top_n_tokens,
|
2023-04-26 18:23:54 +00:00
|
|
|
max_input_length,
|
|
|
|
max_total_tokens,
|
2024-02-15 09:28:10 +00:00
|
|
|
disable_grammar_support,
|
2023-04-26 18:23:54 +00:00
|
|
|
);
|
2023-04-25 12:13:14 +00:00
|
|
|
|
|
|
|
let max_new_tokens = 10;
|
2023-04-26 18:23:54 +00:00
|
|
|
match validation
|
2023-10-04 15:38:42 +00:00
|
|
|
.validate_input("Hello".to_string(), None, Some(max_new_tokens))
|
2023-04-26 18:23:54 +00:00
|
|
|
.await
|
|
|
|
{
|
2023-08-28 09:43:47 +00:00
|
|
|
Err(ValidationError::MaxTotalTokens(6, 1, 10)) => (),
|
2023-04-26 18:23:54 +00:00
|
|
|
_ => panic!("Unexpected not max new tokens"),
|
2023-04-25 12:13:14 +00:00
|
|
|
}
|
|
|
|
}
|
2023-04-26 14:14:40 +00:00
|
|
|
|
|
|
|
#[tokio::test]
|
2023-04-26 18:23:54 +00:00
|
|
|
async fn test_validation_best_of_sampling() {
|
2023-04-26 14:14:40 +00:00
|
|
|
let tokenizer = Some(get_tokenizer().await);
|
|
|
|
let max_best_of = 2;
|
|
|
|
let max_stop_sequence = 3;
|
2023-08-28 09:43:47 +00:00
|
|
|
let max_top_n_tokens = 4;
|
|
|
|
let max_input_length = 5;
|
|
|
|
let max_total_tokens = 6;
|
2023-04-26 14:14:40 +00:00
|
|
|
let workers = 1;
|
2024-02-15 09:28:10 +00:00
|
|
|
let disable_grammar_support = true;
|
2023-04-26 18:23:54 +00:00
|
|
|
let validation = Validation::new(
|
|
|
|
workers,
|
|
|
|
tokenizer,
|
|
|
|
max_best_of,
|
|
|
|
max_stop_sequence,
|
2023-08-28 09:43:47 +00:00
|
|
|
max_top_n_tokens,
|
2023-04-26 18:23:54 +00:00
|
|
|
max_input_length,
|
|
|
|
max_total_tokens,
|
2024-02-15 09:28:10 +00:00
|
|
|
disable_grammar_support,
|
2023-04-26 18:23:54 +00:00
|
|
|
);
|
|
|
|
match validation
|
|
|
|
.validate(GenerateRequest {
|
|
|
|
inputs: "Hello".to_string(),
|
|
|
|
parameters: GenerateParameters {
|
|
|
|
best_of: Some(2),
|
|
|
|
do_sample: false,
|
|
|
|
..default_parameters()
|
|
|
|
},
|
|
|
|
})
|
|
|
|
.await
|
|
|
|
{
|
2023-04-26 14:14:40 +00:00
|
|
|
Err(ValidationError::BestOfSampling) => (),
|
2023-04-26 18:23:54 +00:00
|
|
|
_ => panic!("Unexpected not best of sampling"),
|
2023-04-26 14:14:40 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[tokio::test]
|
2023-04-26 18:23:54 +00:00
|
|
|
async fn test_validation_top_p() {
|
2023-04-26 14:14:40 +00:00
|
|
|
let tokenizer = Some(get_tokenizer().await);
|
|
|
|
let max_best_of = 2;
|
|
|
|
let max_stop_sequence = 3;
|
2023-08-28 09:43:47 +00:00
|
|
|
let max_top_n_tokens = 4;
|
|
|
|
let max_input_length = 5;
|
2023-12-14 14:59:38 +00:00
|
|
|
let max_total_tokens = 106;
|
2023-04-26 14:14:40 +00:00
|
|
|
let workers = 1;
|
2024-02-15 09:28:10 +00:00
|
|
|
let disable_grammar_support = true;
|
2023-04-26 18:23:54 +00:00
|
|
|
let validation = Validation::new(
|
|
|
|
workers,
|
|
|
|
tokenizer,
|
|
|
|
max_best_of,
|
|
|
|
max_stop_sequence,
|
2023-08-28 09:43:47 +00:00
|
|
|
max_top_n_tokens,
|
2023-04-26 18:23:54 +00:00
|
|
|
max_input_length,
|
|
|
|
max_total_tokens,
|
2024-02-15 09:28:10 +00:00
|
|
|
disable_grammar_support,
|
2023-04-26 18:23:54 +00:00
|
|
|
);
|
|
|
|
match validation
|
|
|
|
.validate(GenerateRequest {
|
|
|
|
inputs: "Hello".to_string(),
|
|
|
|
parameters: GenerateParameters {
|
|
|
|
top_p: Some(1.0),
|
2023-12-18 09:20:08 +00:00
|
|
|
max_new_tokens: Some(5),
|
2023-04-26 18:23:54 +00:00
|
|
|
..default_parameters()
|
|
|
|
},
|
|
|
|
})
|
|
|
|
.await
|
|
|
|
{
|
2023-04-26 14:14:40 +00:00
|
|
|
Err(ValidationError::TopP) => (),
|
2023-04-26 18:23:54 +00:00
|
|
|
_ => panic!("Unexpected top_p"),
|
2023-04-26 14:14:40 +00:00
|
|
|
}
|
|
|
|
|
2023-04-26 18:23:54 +00:00
|
|
|
match validation
|
|
|
|
.validate(GenerateRequest {
|
|
|
|
inputs: "Hello".to_string(),
|
|
|
|
parameters: GenerateParameters {
|
|
|
|
top_p: Some(0.99),
|
2023-12-18 09:20:08 +00:00
|
|
|
max_new_tokens: Some(5),
|
2023-04-26 18:23:54 +00:00
|
|
|
..default_parameters()
|
|
|
|
},
|
|
|
|
})
|
|
|
|
.await
|
|
|
|
{
|
2023-04-26 14:14:40 +00:00
|
|
|
Ok(_) => (),
|
2023-04-26 18:23:54 +00:00
|
|
|
_ => panic!("Unexpected top_p error"),
|
2023-04-26 14:14:40 +00:00
|
|
|
}
|
|
|
|
|
2023-04-26 18:23:54 +00:00
|
|
|
let valid_request = validation
|
|
|
|
.validate(GenerateRequest {
|
|
|
|
inputs: "Hello".to_string(),
|
|
|
|
parameters: GenerateParameters {
|
|
|
|
top_p: None,
|
2023-12-18 09:20:08 +00:00
|
|
|
max_new_tokens: Some(5),
|
2023-04-26 18:23:54 +00:00
|
|
|
..default_parameters()
|
|
|
|
},
|
|
|
|
})
|
|
|
|
.await
|
|
|
|
.unwrap();
|
2023-04-26 14:14:40 +00:00
|
|
|
// top_p == 1.0 is invalid for users to ask for but it's the default resolved value.
|
|
|
|
assert_eq!(valid_request.parameters.top_p, 1.0);
|
|
|
|
}
|
2023-08-28 09:43:47 +00:00
|
|
|
|
|
|
|
#[tokio::test]
|
|
|
|
async fn test_validation_top_n_tokens() {
|
|
|
|
let tokenizer = Some(get_tokenizer().await);
|
|
|
|
let max_best_of = 2;
|
|
|
|
let max_stop_sequences = 3;
|
|
|
|
let max_top_n_tokens = 4;
|
|
|
|
let max_input_length = 5;
|
2023-12-14 14:59:38 +00:00
|
|
|
let max_total_tokens = 106;
|
2023-08-28 09:43:47 +00:00
|
|
|
let workers = 1;
|
2024-02-15 09:28:10 +00:00
|
|
|
let disable_grammar_support = true;
|
2023-08-28 09:43:47 +00:00
|
|
|
let validation = Validation::new(
|
|
|
|
workers,
|
|
|
|
tokenizer,
|
|
|
|
max_best_of,
|
|
|
|
max_stop_sequences,
|
|
|
|
max_top_n_tokens,
|
|
|
|
max_input_length,
|
|
|
|
max_total_tokens,
|
2024-02-15 09:28:10 +00:00
|
|
|
disable_grammar_support,
|
2023-08-28 09:43:47 +00:00
|
|
|
);
|
|
|
|
match validation
|
|
|
|
.validate(GenerateRequest {
|
|
|
|
inputs: "Hello".to_string(),
|
|
|
|
parameters: GenerateParameters {
|
|
|
|
top_n_tokens: Some(5),
|
2023-12-18 09:20:08 +00:00
|
|
|
max_new_tokens: Some(5),
|
2023-08-28 09:43:47 +00:00
|
|
|
..default_parameters()
|
|
|
|
},
|
|
|
|
})
|
|
|
|
.await
|
|
|
|
{
|
|
|
|
Err(ValidationError::TopNTokens(4, 5)) => (),
|
|
|
|
_ => panic!("Unexpected top_n_tokens"),
|
|
|
|
}
|
|
|
|
|
|
|
|
validation
|
|
|
|
.validate(GenerateRequest {
|
|
|
|
inputs: "Hello".to_string(),
|
|
|
|
parameters: GenerateParameters {
|
|
|
|
top_n_tokens: Some(4),
|
2023-12-18 09:20:08 +00:00
|
|
|
max_new_tokens: Some(5),
|
2023-08-28 09:43:47 +00:00
|
|
|
..default_parameters()
|
|
|
|
},
|
|
|
|
})
|
|
|
|
.await
|
|
|
|
.unwrap();
|
|
|
|
|
|
|
|
validation
|
|
|
|
.validate(GenerateRequest {
|
|
|
|
inputs: "Hello".to_string(),
|
|
|
|
parameters: GenerateParameters {
|
|
|
|
top_n_tokens: Some(0),
|
2023-12-18 09:20:08 +00:00
|
|
|
max_new_tokens: Some(5),
|
2023-08-28 09:43:47 +00:00
|
|
|
..default_parameters()
|
|
|
|
},
|
|
|
|
})
|
|
|
|
.await
|
|
|
|
.unwrap();
|
|
|
|
|
|
|
|
let valid_request = validation
|
|
|
|
.validate(GenerateRequest {
|
|
|
|
inputs: "Hello".to_string(),
|
|
|
|
parameters: GenerateParameters {
|
|
|
|
top_n_tokens: None,
|
2023-12-18 09:20:08 +00:00
|
|
|
max_new_tokens: Some(5),
|
2023-08-28 09:43:47 +00:00
|
|
|
..default_parameters()
|
|
|
|
},
|
|
|
|
})
|
|
|
|
.await
|
|
|
|
.unwrap();
|
|
|
|
|
|
|
|
assert_eq!(valid_request.top_n_tokens, 0);
|
|
|
|
}
|
2023-04-25 12:13:14 +00:00
|
|
|
}
|