diff --git a/router/src/validation.rs b/router/src/validation.rs
index ff659b3e..f6da1913 100644
--- a/router/src/validation.rs
+++ b/router/src/validation.rs
@@ -131,7 +131,7 @@ fn validation_worker(
         }
 
         // Get the number of tokens in the input
-        match tokenizer.encode(request.inputs.clone(), false) {
+        match tokenizer.encode(request.inputs.clone(), true) {
             Ok(inputs) => {
                 let input_length = inputs.len();
 
diff --git a/server/text_generation/models/causal_lm.py b/server/text_generation/models/causal_lm.py
index 6c567069..b352eb6b 100644
--- a/server/text_generation/models/causal_lm.py
+++ b/server/text_generation/models/causal_lm.py
@@ -65,7 +65,7 @@ class CausalLMBatch:
             )
             all_logprobs.append(None)
 
-        pad_to_multiple_of = 8 if "gpu" in str(device) else None
+        pad_to_multiple_of = 8 if device.type == "cuda" else None
         tokenized_inputs = tokenizer(
             inputs,
             return_tensors="pt",
diff --git a/server/text_generation/models/seq2seq_lm.py b/server/text_generation/models/seq2seq_lm.py
index fc80c60c..4095db92 100644
--- a/server/text_generation/models/seq2seq_lm.py
+++ b/server/text_generation/models/seq2seq_lm.py
@@ -77,7 +77,7 @@ class Seq2SeqLMBatch:
             decoder_logprobs.append(None)
 
         # Tokenize batch
-        pad_to_multiple_of = 8 if "gpu" in str(device) else None
+        pad_to_multiple_of = 8 if device.type == "cuda" else None
         tokenized_inputs = tokenizer(
             inputs,
             return_tensors="pt",