mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-09 19:34:53 +00:00
test(neuron): add phi3 and qwen3 tests
This commit is contained in:
parent
83b766f9ef
commit
7eca8e584e
@ -46,6 +46,15 @@ MODEL_CONFIGURATIONS = {
|
|||||||
"auto_cast_type": "fp16",
|
"auto_cast_type": "fp16",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
"qwen3": {
|
||||||
|
"model_id": "Qwen/Qwen3-1.7B",
|
||||||
|
"export_kwargs": {
|
||||||
|
"batch_size": 4,
|
||||||
|
"sequence_length": 4096,
|
||||||
|
"num_cores": 2,
|
||||||
|
"auto_cast_type": "bf16",
|
||||||
|
},
|
||||||
|
},
|
||||||
"granite": {
|
"granite": {
|
||||||
"model_id": "ibm-granite/granite-3.1-2b-instruct",
|
"model_id": "ibm-granite/granite-3.1-2b-instruct",
|
||||||
"export_kwargs": {
|
"export_kwargs": {
|
||||||
@ -55,6 +64,15 @@ MODEL_CONFIGURATIONS = {
|
|||||||
"auto_cast_type": "bf16",
|
"auto_cast_type": "bf16",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
"phi3": {
|
||||||
|
"model_id": "microsoft/Phi-3-mini-4k-instruct",
|
||||||
|
"export_kwargs": {
|
||||||
|
"batch_size": 4,
|
||||||
|
"sequence_length": 4096,
|
||||||
|
"num_cores": 2,
|
||||||
|
"auto_cast_type": "bf16",
|
||||||
|
},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -23,6 +23,8 @@ async def test_model_single_request(tgi_service):
|
|||||||
"llama": " and how does it work?\nDeep learning is a subset of machine learning that uses artificial",
|
"llama": " and how does it work?\nDeep learning is a subset of machine learning that uses artificial",
|
||||||
"qwen2": " - Deep Learning is a subset of Machine Learning that involves the use of artificial neural networks",
|
"qwen2": " - Deep Learning is a subset of Machine Learning that involves the use of artificial neural networks",
|
||||||
"granite": "\n\nDeep learning is a subset of machine learning techniques based on artificial neural networks",
|
"granite": "\n\nDeep learning is a subset of machine learning techniques based on artificial neural networks",
|
||||||
|
"qwen3": " A Deep Learning is a subset of machine learning that uses neural networks with multiple layers to",
|
||||||
|
"phi3": "\n\nDeep learning is a subfield of machine learning that focuses on creating",
|
||||||
}
|
}
|
||||||
assert response.generated_text == greedy_expectations[service_name]
|
assert response.generated_text == greedy_expectations[service_name]
|
||||||
|
|
||||||
@ -80,6 +82,8 @@ async def test_model_multiple_requests(tgi_service, neuron_generate_load):
|
|||||||
"llama": "Deep learning is a subset of machine learning that uses artificial",
|
"llama": "Deep learning is a subset of machine learning that uses artificial",
|
||||||
"qwen2": "Deep Learning is a subset of Machine Learning that involves",
|
"qwen2": "Deep Learning is a subset of Machine Learning that involves",
|
||||||
"granite": "Deep learning is a subset of machine learning techniques",
|
"granite": "Deep learning is a subset of machine learning techniques",
|
||||||
|
"qwen3": "Deep Learning is a subset of machine learning that uses neural networks",
|
||||||
|
"phi3": "Deep learning is a subfield of machine learning that focuses on creating",
|
||||||
}
|
}
|
||||||
expected = expectations[tgi_service.client.service_name]
|
expected = expectations[tgi_service.client.service_name]
|
||||||
for r in responses:
|
for r in responses:
|
||||||
|
Loading…
Reference in New Issue
Block a user