From a6b128b293a323973a33637a089912cb3b9795b6 Mon Sep 17 00:00:00 2001 From: OlivierDehaene <23298448+OlivierDehaene@users.noreply.github.com> Date: Tue, 18 Jul 2023 11:41:10 +0200 Subject: [PATCH] fix default value --- router/src/main.rs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/router/src/main.rs b/router/src/main.rs index 03e920c6..6449e647 100644 --- a/router/src/main.rs +++ b/router/src/main.rs @@ -220,8 +220,14 @@ fn main() -> Result<(), RouterError> { .map_err(RouterError::Warmup)? { // Older models do not support automatic max-batch-total-tokens - None => max_batch_total_tokens - .unwrap_or(16000.max((max_total_tokens as u32).max(max_batch_prefill_tokens))), + None => { + let max_batch_total_tokens = max_batch_total_tokens.unwrap_or( + 16000.max((max_total_tokens as u32).max(max_batch_prefill_tokens)), + ); + tracing::warn!("Model does not support automatic max batch total tokens"); + tracing::warn!("Setting max batch total tokens to {max_batch_total_tokens}"); + max_batch_total_tokens + } // Flash attention models return their max supported total tokens Some(max_supported_batch_total_tokens) => { // Warn if user added his own max-batch-total-tokens as we will ignore it