mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-26 12:32:10 +00:00
Remove the default max_tokens for /v1/chat/completions (#251)
This commit is contained in:
parent
cc2ca4ac22
commit
61309b2832
@ -15,7 +15,7 @@ authors = ["Olivier Dehaene"]
|
||||
homepage = "https://github.com/huggingface/text-generation-inference"
|
||||
|
||||
[workspace.dependencies]
|
||||
tokenizers = { version = "0.19.1", features = ["http"] }
|
||||
tokenizers = { version = "0.20.0", features = ["http"] }
|
||||
hf-hub = { version = "0.3.1", features = ["tokio"] }
|
||||
|
||||
[profile.release]
|
||||
|
@ -155,7 +155,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
// We need to download it outside of the Tokio runtime
|
||||
let params = FromPretrainedParameters {
|
||||
revision,
|
||||
auth_token,
|
||||
token: auth_token,
|
||||
..Default::default()
|
||||
};
|
||||
Tokenizer::from_pretrained(tokenizer_name.clone(), Some(params)).unwrap()
|
||||
|
@ -608,7 +608,6 @@ async fn completions(
|
||||
..
|
||||
} = req;
|
||||
|
||||
let max_new_tokens = max_tokens.or(Some(100));
|
||||
let stop = stop.unwrap_or_default();
|
||||
// enable greedy only when temperature is 0
|
||||
let (do_sample, temperature) = match temperature {
|
||||
@ -657,7 +656,7 @@ async fn completions(
|
||||
top_p: req.top_p,
|
||||
typical_p: None,
|
||||
do_sample,
|
||||
max_new_tokens,
|
||||
max_new_tokens: max_tokens,
|
||||
return_full_text: None,
|
||||
stop: stop.clone(),
|
||||
truncate: None,
|
||||
@ -1019,7 +1018,6 @@ async fn chat_completions(
|
||||
} = req;
|
||||
|
||||
let repetition_penalty = presence_penalty.map(|x| x + 2.0);
|
||||
let max_new_tokens = max_tokens.or(Some(100));
|
||||
let logprobs = logprobs.unwrap_or(false);
|
||||
let tool_prompt = tool_prompt.unwrap_or_default();
|
||||
let stop = stop.unwrap_or_default();
|
||||
@ -1081,7 +1079,7 @@ async fn chat_completions(
|
||||
top_p: req.top_p,
|
||||
typical_p: None,
|
||||
do_sample,
|
||||
max_new_tokens,
|
||||
max_new_tokens: max_tokens,
|
||||
return_full_text: None,
|
||||
stop,
|
||||
truncate: None,
|
||||
|
Loading…
Reference in New Issue
Block a user