mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-10 20:04:52 +00:00
fix default value
This commit is contained in:
parent
4201a8be46
commit
a6b128b293
@ -220,8 +220,14 @@ fn main() -> Result<(), RouterError> {
|
|||||||
.map_err(RouterError::Warmup)?
|
.map_err(RouterError::Warmup)?
|
||||||
{
|
{
|
||||||
// Older models do not support automatic max-batch-total-tokens
|
// Older models do not support automatic max-batch-total-tokens
|
||||||
None => max_batch_total_tokens
|
None => {
|
||||||
.unwrap_or(16000.max((max_total_tokens as u32).max(max_batch_prefill_tokens))),
|
let max_batch_total_tokens = max_batch_total_tokens.unwrap_or(
|
||||||
|
16000.max((max_total_tokens as u32).max(max_batch_prefill_tokens)),
|
||||||
|
);
|
||||||
|
tracing::warn!("Model does not support automatic max batch total tokens");
|
||||||
|
tracing::warn!("Setting max batch total tokens to {max_batch_total_tokens}");
|
||||||
|
max_batch_total_tokens
|
||||||
|
}
|
||||||
// Flash attention models return their max supported total tokens
|
// Flash attention models return their max supported total tokens
|
||||||
Some(max_supported_batch_total_tokens) => {
|
Some(max_supported_batch_total_tokens) => {
|
||||||
// Warn if user added his own max-batch-total-tokens as we will ignore it
|
// Warn if user added his own max-batch-total-tokens as we will ignore it
|
||||||
|
Loading…
Reference in New Issue
Block a user