Remove the default max_tokens for /v1/chat/completions (#251)

This commit is contained in:
Sun Choi 2024-12-16 00:32:57 -08:00 committed by GitHub
parent cc2ca4ac22
commit 61309b2832
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 4 additions and 6 deletions

View File

@ -15,7 +15,7 @@ authors = ["Olivier Dehaene"]
homepage = "https://github.com/huggingface/text-generation-inference"
[workspace.dependencies]
tokenizers = { version = "0.19.1", features = ["http"] }
tokenizers = { version = "0.20.0", features = ["http"] }
hf-hub = { version = "0.3.1", features = ["tokio"] }
[profile.release]

View File

@ -155,7 +155,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
// We need to download it outside of the Tokio runtime
let params = FromPretrainedParameters {
revision,
auth_token,
token: auth_token,
..Default::default()
};
Tokenizer::from_pretrained(tokenizer_name.clone(), Some(params)).unwrap()

View File

@ -608,7 +608,6 @@ async fn completions(
..
} = req;
let max_new_tokens = max_tokens.or(Some(100));
let stop = stop.unwrap_or_default();
// enable greedy only when temperature is 0
let (do_sample, temperature) = match temperature {
@ -657,7 +656,7 @@ async fn completions(
top_p: req.top_p,
typical_p: None,
do_sample,
max_new_tokens,
max_new_tokens: max_tokens,
return_full_text: None,
stop: stop.clone(),
truncate: None,
@ -1019,7 +1018,6 @@ async fn chat_completions(
} = req;
let repetition_penalty = presence_penalty.map(|x| x + 2.0);
let max_new_tokens = max_tokens.or(Some(100));
let logprobs = logprobs.unwrap_or(false);
let tool_prompt = tool_prompt.unwrap_or_default();
let stop = stop.unwrap_or_default();
@ -1081,7 +1079,7 @@ async fn chat_completions(
top_p: req.top_p,
typical_p: None,
do_sample,
max_new_tokens,
max_new_tokens: max_tokens,
return_full_text: None,
stop,
truncate: None,