This commit is contained in:
Nicolas Patry 2024-02-08 15:04:27 +00:00
parent 29a8d5a3a1
commit 40f693b6b9
4 changed files with 12 additions and 5 deletions

View File

@ -354,6 +354,14 @@ Options:
[env: NGROK_EDGE=] [env: NGROK_EDGE=]
```
## BATCH_DIMENSION
```shell
--batch-dimension
Specific flag for hardware targets that do not support unpadded inference For those we do not send the tokenizer to the router so that all the scheduling assumes those pad tokens exist (and potentially even more)
[env: BATCH_DIMENSION=]
``` ```
## TOKENIZER_CONFIG_PATH ## TOKENIZER_CONFIG_PATH
```shell ```shell

View File

@ -1040,7 +1040,7 @@ fn spawn_webserver(
args.model_id, args.model_id,
]; ];
if args.batch_dimension{ if args.batch_dimension {
router_args.push("--batch-dimension".to_string()); router_args.push("--batch-dimension".to_string());
} }

View File

@ -834,7 +834,7 @@ pub async fn run(
max_top_n_tokens, max_top_n_tokens,
max_input_length, max_input_length,
max_total_tokens, max_total_tokens,
batch_dimension batch_dimension,
); );
let generation_health = Arc::new(AtomicBool::new(false)); let generation_health = Arc::new(AtomicBool::new(false));
let health_ext = Health::new(client.clone(), generation_health.clone()); let health_ext = Health::new(client.clone(), generation_health.clone());

View File

@ -69,7 +69,7 @@ impl Validation {
max_top_n_tokens, max_top_n_tokens,
max_input_length, max_input_length,
max_total_tokens, max_total_tokens,
batch_dimension batch_dimension,
} }
} }
@ -107,7 +107,7 @@ impl Validation {
) -> Result<(String, usize, u32), ValidationError> { ) -> Result<(String, usize, u32), ValidationError> {
// If we have a fast tokenizer // If we have a fast tokenizer
if let Some((encoding, inputs)) = self.tokenize(inputs.clone(), truncate).await? { if let Some((encoding, inputs)) = self.tokenize(inputs.clone(), truncate).await? {
if self.batch_dimension{ if self.batch_dimension {
let input_length = encoding.len(); let input_length = encoding.len();
// Get total tokens // Get total tokens
@ -135,7 +135,6 @@ impl Validation {
)); ));
} }
//
metrics::histogram!("tgi_request_input_length", input_length as f64); metrics::histogram!("tgi_request_input_length", input_length as f64);
return Ok((inputs, input_length, max_new_tokens)); return Ok((inputs, input_length, max_new_tokens));
} }