diff --git a/benchmark/src/main.rs b/benchmark/src/main.rs
index 2ee3d7c5..2ec9c882 100644
--- a/benchmark/src/main.rs
+++ b/benchmark/src/main.rs
@@ -155,7 +155,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
             // We need to download it outside of the Tokio runtime
             let params = FromPretrainedParameters {
                 revision,
-                auth_token,
+                token: auth_token,
                 ..Default::default()
             };
             Tokenizer::from_pretrained(tokenizer_name.clone(), Some(params)).unwrap()
diff --git a/examples/README.md b/examples/README.md
index e605364e..226595c6 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -22,7 +22,7 @@ To run benchmark use below command:
 python run_generation --model_id MODEL_ID
 ```
 where `MODEL_ID` should be set to the same value as in the TGI server instance.
-> For gated models such as [LLama](https://huggingface.co/meta-llama) or [StarCoder](https://huggingface.co/bigcode/starcoder), you will have to set environment variable `HUGGING_FACE_HUB_TOKEN=<token>` with a valid Hugging Face Hub read token.
+> For gated models such as [LLama](https://huggingface.co/meta-llama) or [StarCoder](https://huggingface.co/bigcode/starcoder), you will have to set environment variable `HF_TOKEN=<token>` with a valid Hugging Face Hub read token.
 
 All possible parameters are described in the below table:
 <div align="left">
diff --git a/integration-tests/conftest.py b/integration-tests/conftest.py
index eb55ebb9..14523cd4 100644
--- a/integration-tests/conftest.py
+++ b/integration-tests/conftest.py
@@ -32,7 +32,7 @@ from text_generation.types import (
 )
 
 DOCKER_IMAGE = os.getenv("DOCKER_IMAGE", None)
-HF_TOKEN = os.getenv("HF_TOKEN", None)
+HUGGING_FACE_HUB_TOKEN = os.getenv("HF_TOKEN") or os.getenv("HUGGING_FACE_HUB_TOKEN")
 DOCKER_VOLUME = os.getenv("DOCKER_VOLUME", "/data")
 DOCKER_DEVICES = os.getenv("DOCKER_DEVICES")
 
@@ -498,8 +498,8 @@ def launcher(event_loop):
         if attention is not None:
             env["ATTENTION"] = attention
 
-        if HF_TOKEN is not None:
-            env["HF_TOKEN"] = HF_TOKEN
+        if HUGGING_FACE_HUB_TOKEN is not None:
+            env["HF_TOKEN"] = HUGGING_FACE_HUB_TOKEN
 
         volumes = []
         if DOCKER_VOLUME:
diff --git a/router/src/server.rs b/router/src/server.rs
index 15eb1db1..1a909f0f 100644
--- a/router/src/server.rs
+++ b/router/src/server.rs
@@ -689,7 +689,6 @@ async fn completions(
         ..
     } = req;
 
-    let max_new_tokens = max_tokens.or(Some(100));
     let stop = stop.unwrap_or_default();
     // enable greedy only when temperature is 0
     let (do_sample, temperature) = match temperature {
@@ -740,7 +739,7 @@ async fn completions(
                 top_p: req.top_p,
                 typical_p: None,
                 do_sample,
-                max_new_tokens,
+                max_new_tokens: max_tokens,
                 return_full_text: None,
                 stop: stop.clone(),
                 truncate: None,