From 7d6ff64c133353d69f0d1a9c8e8eba2d5d72d97a Mon Sep 17 00:00:00 2001
From: David Corvoysier <david@huggingface.co>
Date: Wed, 19 Feb 2025 14:51:43 +0000
Subject: [PATCH] test(neuron): use smaller llama model

---
 integration-tests/fixtures/neuron/model.py            | 2 +-
 integration-tests/neuron/integration/test_generate.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/integration-tests/fixtures/neuron/model.py b/integration-tests/fixtures/neuron/model.py
index 676cb093..eb109121 100644
--- a/integration-tests/fixtures/neuron/model.py
+++ b/integration-tests/fixtures/neuron/model.py
@@ -32,7 +32,7 @@ MODEL_CONFIGURATIONS = {
         "export_kwargs": {"batch_size": 4, "sequence_length": 1024, "num_cores": 2, "auto_cast_type": "fp16"},
     },
     "llama": {
-        "model_id": "NousResearch/Hermes-2-Theta-Llama-3-8B",
+        "model_id": "unsloth/Llama-3.2-1B-Instruct",
         "export_kwargs": {"batch_size": 4, "sequence_length": 2048, "num_cores": 2, "auto_cast_type": "fp16"},
     },
     "mistral": {
diff --git a/integration-tests/neuron/integration/test_generate.py b/integration-tests/neuron/integration/test_generate.py
index 80c83623..96f09838 100644
--- a/integration-tests/neuron/integration/test_generate.py
+++ b/integration-tests/neuron/integration/test_generate.py
@@ -21,7 +21,7 @@ async def test_model_single_request(tgi_service):
     assert response.details.generated_tokens == 17
     greedy_expectations = {
         "gpt2": "\n\nDeep learning is a new field of research that has been around for a while",
-        "llama": " A Beginner’s Guide\nDeep learning is a subset of machine learning that involves the use",
+        "llama": " and How Does it Work?\nDeep learning is a subset of machine learning that uses artificial",
         "mistral": "\nWhat is Deep Learning?\nDeep Learning is a type of machine learning that",
         "qwen2": " - Part 1\n\nDeep Learning is a subset of Machine Learning that is based on",
         "granite": "\n\nDeep Learning is a subset of Machine Learning, which is a branch of Art",
@@ -82,7 +82,7 @@ async def test_model_multiple_requests(tgi_service, neuron_generate_load):
     assert len(responses) == 4
     expectations = {
         "gpt2": "Deep learning is a new field of research that has been around for a while",
-        "llama": "Deep learning is a subset of machine learning that involves the use",
+        "llama": "Deep learning is a subset of machine learning that uses artificial",
         "mistral": "Deep Learning is a type of machine learning that",
         "qwen2": "Deep Learning is a subset of Machine Learning that is based on",
         "granite": "Deep Learning is a subset of Machine Learning, which is a branch of Art",