fix default value

This commit is contained in:
OlivierDehaene 2023-07-18 11:41:10 +02:00
parent 4201a8be46
commit a6b128b293

View File

@ -220,8 +220,14 @@ fn main() -> Result<(), RouterError> {
.map_err(RouterError::Warmup)? .map_err(RouterError::Warmup)?
{ {
// Older models do not support automatic max-batch-total-tokens // Older models do not support automatic max-batch-total-tokens
None => max_batch_total_tokens None => {
.unwrap_or(16000.max((max_total_tokens as u32).max(max_batch_prefill_tokens))), let max_batch_total_tokens = max_batch_total_tokens.unwrap_or(
16000.max((max_total_tokens as u32).max(max_batch_prefill_tokens)),
);
tracing::warn!("Model does not support automatic max batch total tokens");
tracing::warn!("Setting max batch total tokens to {max_batch_total_tokens}");
max_batch_total_tokens
}
// Flash attention models return their max supported total tokens // Flash attention models return their max supported total tokens
Some(max_supported_batch_total_tokens) => { Some(max_supported_batch_total_tokens) => {
// Warn if user added his own max-batch-total-tokens as we will ignore it // Warn if user added his own max-batch-total-tokens as we will ignore it