fix(neuron): adjust test expectations for llama on nxd

This commit is contained in:
David Corvoysier 2025-05-26 13:55:20 +00:00
parent 2c8b0e37c4
commit d5bad17ed6
3 changed files with 7 additions and 5 deletions

View File

@ -40,7 +40,7 @@ def _test_decode(config_name, generator, do_sample):
assert output.finish_reason == 0
if do_sample:
expected_text = {
"llama": " In the stillness of the morning",
"llama": " I sat alone in the café",
"qwen2": " The air was so still",
"granite": "1984, George Orwell",
}[config_name]

View File

@ -46,7 +46,7 @@ def _test_prefill(config_name, generator, batch_size, do_sample):
assert len(generations) == batch_size
if do_sample:
expectations = {
"llama": [763, " In"],
"llama": [358, " I"],
"qwen2": [576, " The"],
"granite": [308, " ("],
}[config_name]
@ -87,10 +87,12 @@ def test_prefill_truncate(neuron_model_config):
# Even if the input text is identical for all requests, the first generated token might
# be different because of the truncation
expectations = {
"llama": [" He", " The", " He", " He"],
"llama": [" He", "iens", "\x08", " He"],
"qwen2": [" He", " The", " He", " He"],
"granite": ["\n", "\n", " I", " He"],
}[config_name]
for i, g in enumerate(generations):
tokens = g.tokens
assert tokens.texts[0] == expectations[i]
assert (
tokens.texts[0] == expectations[i]
), f"Request {i} expected [{expectations[i]}], got [{tokens.texts[0]}]"

View File

@ -20,7 +20,7 @@ async def test_model_single_request(tgi_service):
)
assert response.details.generated_tokens == 17
greedy_expectations = {
"llama": " and How Does it Work?\nDeep learning is a subset of machine learning that uses artificial",
"llama": " and how does it work?\nDeep learning is a subset of machine learning that uses artificial",
"qwen2": " - Part 1\n\nDeep Learning is a subset of Machine Learning that is based on",
"granite": "\n\nDeep Learning is a subset of Machine Learning, which is a branch of Art",
}