mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-21 06:42:10 +00:00
test(neuron): use smaller llama model
This commit is contained in:
parent
6f92198eb9
commit
b5e98a6d5a
@ -32,7 +32,7 @@ MODEL_CONFIGURATIONS = {
|
|||||||
"export_kwargs": {"batch_size": 4, "sequence_length": 1024, "num_cores": 2, "auto_cast_type": "fp16"},
|
"export_kwargs": {"batch_size": 4, "sequence_length": 1024, "num_cores": 2, "auto_cast_type": "fp16"},
|
||||||
},
|
},
|
||||||
"llama": {
|
"llama": {
|
||||||
"model_id": "NousResearch/Hermes-2-Theta-Llama-3-8B",
|
"model_id": "unsloth/Llama-3.2-1B-Instruct",
|
||||||
"export_kwargs": {"batch_size": 4, "sequence_length": 2048, "num_cores": 2, "auto_cast_type": "fp16"},
|
"export_kwargs": {"batch_size": 4, "sequence_length": 2048, "num_cores": 2, "auto_cast_type": "fp16"},
|
||||||
},
|
},
|
||||||
"mistral": {
|
"mistral": {
|
||||||
|
@ -21,7 +21,7 @@ async def test_model_single_request(tgi_service):
|
|||||||
assert response.details.generated_tokens == 17
|
assert response.details.generated_tokens == 17
|
||||||
greedy_expectations = {
|
greedy_expectations = {
|
||||||
"gpt2": "\n\nDeep learning is a new field of research that has been around for a while",
|
"gpt2": "\n\nDeep learning is a new field of research that has been around for a while",
|
||||||
"llama": " A Beginner’s Guide\nDeep learning is a subset of machine learning that involves the use",
|
"llama": " and How Does it Work?\nDeep learning is a subset of machine learning that uses artificial",
|
||||||
"mistral": "\nWhat is Deep Learning?\nDeep Learning is a type of machine learning that",
|
"mistral": "\nWhat is Deep Learning?\nDeep Learning is a type of machine learning that",
|
||||||
"qwen2": " - Part 1\n\nDeep Learning is a subset of Machine Learning that is based on",
|
"qwen2": " - Part 1\n\nDeep Learning is a subset of Machine Learning that is based on",
|
||||||
"granite": "\n\nDeep Learning is a subset of Machine Learning, which is a branch of Art",
|
"granite": "\n\nDeep Learning is a subset of Machine Learning, which is a branch of Art",
|
||||||
@ -82,7 +82,7 @@ async def test_model_multiple_requests(tgi_service, neuron_generate_load):
|
|||||||
assert len(responses) == 4
|
assert len(responses) == 4
|
||||||
expectations = {
|
expectations = {
|
||||||
"gpt2": "Deep learning is a new field of research that has been around for a while",
|
"gpt2": "Deep learning is a new field of research that has been around for a while",
|
||||||
"llama": "Deep learning is a subset of machine learning that involves the use",
|
"llama": "Deep learning is a subset of machine learning that uses artificial",
|
||||||
"mistral": "Deep Learning is a type of machine learning that",
|
"mistral": "Deep Learning is a type of machine learning that",
|
||||||
"qwen2": "Deep Learning is a subset of Machine Learning that is based on",
|
"qwen2": "Deep Learning is a subset of Machine Learning that is based on",
|
||||||
"granite": "Deep Learning is a subset of Machine Learning, which is a branch of Art",
|
"granite": "Deep Learning is a subset of Machine Learning, which is a branch of Art",
|
||||||
|
Loading…
Reference in New Issue
Block a user