diff --git a/backends/neuron/tests/server/test_decode.py b/backends/neuron/tests/server/test_decode.py
index 94d1e95e..b864e3ec 100644
--- a/backends/neuron/tests/server/test_decode.py
+++ b/backends/neuron/tests/server/test_decode.py
@@ -40,7 +40,7 @@ def _test_decode(config_name, generator, do_sample):
     assert output.finish_reason == 0
     if do_sample:
         expected_text = {
-            "llama": " In the stillness of the morning",
+            "llama": " I sat alone in the café",
             "qwen2": " The air was so still",
             "granite": "1984, George Orwell",
         }[config_name]
diff --git a/backends/neuron/tests/server/test_prefill.py b/backends/neuron/tests/server/test_prefill.py
index 48fd62ba..c9ecd1c8 100644
--- a/backends/neuron/tests/server/test_prefill.py
+++ b/backends/neuron/tests/server/test_prefill.py
@@ -46,7 +46,7 @@ def _test_prefill(config_name, generator, batch_size, do_sample):
     assert len(generations) == batch_size
     if do_sample:
         expectations = {
-            "llama": [763, " In"],
+            "llama": [358, " I"],
             "qwen2": [576, " The"],
             "granite": [308, " ("],
         }[config_name]
@@ -87,10 +87,12 @@ def test_prefill_truncate(neuron_model_config):
     # Even if the input text is identical for all requests, the first generated token might
     # be different because of the truncation
     expectations = {
-        "llama": [" He", " The", " He", " He"],
+        "llama": [" He", "iens", "\x08", " He"],
         "qwen2": [" He", " The", " He", " He"],
         "granite": ["\n", "\n", " I", " He"],
     }[config_name]
     for i, g in enumerate(generations):
         tokens = g.tokens
-        assert tokens.texts[0] == expectations[i]
+        assert (
+            tokens.texts[0] == expectations[i]
+        ), f"Request {i} expected [{expectations[i]}], got [{tokens.texts[0]}]"
diff --git a/integration-tests/neuron/test_generate.py b/integration-tests/neuron/test_generate.py
index 555b4eaa..9108ce0e 100644
--- a/integration-tests/neuron/test_generate.py
+++ b/integration-tests/neuron/test_generate.py
@@ -20,7 +20,7 @@ async def test_model_single_request(tgi_service):
     )
     assert response.details.generated_tokens == 17
     greedy_expectations = {
-        "llama": " and How Does it Work?\nDeep learning is a subset of machine learning that uses artificial",
+        "llama": " and how does it work?\nDeep learning is a subset of machine learning that uses artificial",
         "qwen2": " - Part 1\n\nDeep Learning is a subset of Machine Learning that is based on",
         "granite": "\n\nDeep Learning is a subset of Machine Learning, which is a branch of Art",
     }