mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-27 13:02:12 +00:00
Remove the default max_tokens for /v1/chat/completions (#251)
This commit is contained in:
parent
cc2ca4ac22
commit
61309b2832
@ -15,7 +15,7 @@ authors = ["Olivier Dehaene"]
|
|||||||
homepage = "https://github.com/huggingface/text-generation-inference"
|
homepage = "https://github.com/huggingface/text-generation-inference"
|
||||||
|
|
||||||
[workspace.dependencies]
|
[workspace.dependencies]
|
||||||
tokenizers = { version = "0.19.1", features = ["http"] }
|
tokenizers = { version = "0.20.0", features = ["http"] }
|
||||||
hf-hub = { version = "0.3.1", features = ["tokio"] }
|
hf-hub = { version = "0.3.1", features = ["tokio"] }
|
||||||
|
|
||||||
[profile.release]
|
[profile.release]
|
||||||
|
@ -155,7 +155,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|||||||
// We need to download it outside of the Tokio runtime
|
// We need to download it outside of the Tokio runtime
|
||||||
let params = FromPretrainedParameters {
|
let params = FromPretrainedParameters {
|
||||||
revision,
|
revision,
|
||||||
auth_token,
|
token: auth_token,
|
||||||
..Default::default()
|
..Default::default()
|
||||||
};
|
};
|
||||||
Tokenizer::from_pretrained(tokenizer_name.clone(), Some(params)).unwrap()
|
Tokenizer::from_pretrained(tokenizer_name.clone(), Some(params)).unwrap()
|
||||||
|
@ -608,7 +608,6 @@ async fn completions(
|
|||||||
..
|
..
|
||||||
} = req;
|
} = req;
|
||||||
|
|
||||||
let max_new_tokens = max_tokens.or(Some(100));
|
|
||||||
let stop = stop.unwrap_or_default();
|
let stop = stop.unwrap_or_default();
|
||||||
// enable greedy only when temperature is 0
|
// enable greedy only when temperature is 0
|
||||||
let (do_sample, temperature) = match temperature {
|
let (do_sample, temperature) = match temperature {
|
||||||
@ -657,7 +656,7 @@ async fn completions(
|
|||||||
top_p: req.top_p,
|
top_p: req.top_p,
|
||||||
typical_p: None,
|
typical_p: None,
|
||||||
do_sample,
|
do_sample,
|
||||||
max_new_tokens,
|
max_new_tokens: max_tokens,
|
||||||
return_full_text: None,
|
return_full_text: None,
|
||||||
stop: stop.clone(),
|
stop: stop.clone(),
|
||||||
truncate: None,
|
truncate: None,
|
||||||
@ -1019,7 +1018,6 @@ async fn chat_completions(
|
|||||||
} = req;
|
} = req;
|
||||||
|
|
||||||
let repetition_penalty = presence_penalty.map(|x| x + 2.0);
|
let repetition_penalty = presence_penalty.map(|x| x + 2.0);
|
||||||
let max_new_tokens = max_tokens.or(Some(100));
|
|
||||||
let logprobs = logprobs.unwrap_or(false);
|
let logprobs = logprobs.unwrap_or(false);
|
||||||
let tool_prompt = tool_prompt.unwrap_or_default();
|
let tool_prompt = tool_prompt.unwrap_or_default();
|
||||||
let stop = stop.unwrap_or_default();
|
let stop = stop.unwrap_or_default();
|
||||||
@ -1081,7 +1079,7 @@ async fn chat_completions(
|
|||||||
top_p: req.top_p,
|
top_p: req.top_p,
|
||||||
typical_p: None,
|
typical_p: None,
|
||||||
do_sample,
|
do_sample,
|
||||||
max_new_tokens,
|
max_new_tokens: max_tokens,
|
||||||
return_full_text: None,
|
return_full_text: None,
|
||||||
stop,
|
stop,
|
||||||
truncate: None,
|
truncate: None,
|
||||||
|
Loading…
Reference in New Issue
Block a user