test(neuron): adjust expectations

Since the latest optimum-neuron uses a new modeling for granite and qwen, the greedy outputs are slighly different.
2025-09-09 11:24:53 +00:00 · 2025-06-27 12:31:31 +00:00 · 2025-06-27 12:31:31 +00:00 · 83b766f9ef
commit 83b766f9ef
parent c45977e48b
1 changed files with 4 additions and 4 deletions
--- a/integration-tests/neuron/test_generate.py
+++ b/integration-tests/neuron/test_generate.py
@ -21,8 +21,8 @@ async def test_model_single_request(tgi_service):
    assert response.details.generated_tokens == 17
    greedy_expectations = {
        "llama": " and how does it work?\nDeep learning is a subset of machine learning that uses artificial",
-        "qwen2": " - Part 1\n\nDeep Learning is a subset of Machine Learning that is based on",
+        "qwen2": " - Deep Learning is a subset of Machine Learning that involves the use of artificial neural networks",
-        "granite": "\n\nDeep Learning is a subset of Machine Learning, which is a branch of Art",
+        "granite": "\n\nDeep learning is a subset of machine learning techniques based on artificial neural networks",
    }
    assert response.generated_text == greedy_expectations[service_name]
@ -78,8 +78,8 @@ async def test_model_multiple_requests(tgi_service, neuron_generate_load):
    assert len(responses) == 4
    expectations = {
        "llama": "Deep learning is a subset of machine learning that uses artificial",
-        "qwen2": "Deep Learning is a subset of Machine Learning that is based on",
+        "qwen2": "Deep Learning is a subset of Machine Learning that involves",
-        "granite": "Deep Learning is a subset of Machine Learning, which is a branch of Art",
+        "granite": "Deep learning is a subset of machine learning techniques",
    }
    expected = expectations[tgi_service.client.service_name]
    for r in responses: