Merge branch 'main' into fix_replaying_requests

2025-09-10 11:54:52 +00:00 · 2023-01-02 10:55:21 +01:00 · 2023-01-02 10:55:21 +01:00 · bed5634ead
commit bed5634ead
parent d8e1ce669b 3efa5bbbfd
3 changed files with 3 additions and 3 deletions
--- a/router/src/validation.rs
+++ b/router/src/validation.rs
@ -131,7 +131,7 @@ fn validation_worker(
        }

        // Get the number of tokens in the input
-        match tokenizer.encode(request.inputs.clone(), false) {
+        match tokenizer.encode(request.inputs.clone(), true) {
            Ok(inputs) => {
                let input_length = inputs.len();

--- a/server/text_generation/models/causal_lm.py
+++ b/server/text_generation/models/causal_lm.py
@ -65,7 +65,7 @@ class CausalLMBatch:
            )
            all_logprobs.append(None)

-        pad_to_multiple_of = 8 if "gpu" in str(device) else None
+        pad_to_multiple_of = 8 if device.type == "cuda" else None
        tokenized_inputs = tokenizer(
            inputs,
            return_tensors="pt",
--- a/server/text_generation/models/seq2seq_lm.py
+++ b/server/text_generation/models/seq2seq_lm.py
@ -77,7 +77,7 @@ class Seq2SeqLMBatch:
            decoder_logprobs.append(None)

        # Tokenize batch
-        pad_to_multiple_of = 8 if "gpu" in str(device) else None
+        pad_to_multiple_of = 8 if device.type == "cuda" else None
        tokenized_inputs = tokenizer(
            inputs,
            return_tensors="pt",