mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-23 16:02:10 +00:00
change defualt behaviour to only run a subset of all the models
This commit is contained in:
parent
2821274a88
commit
d98ae4890e
@ -54,6 +54,11 @@ run-integration-tests:
|
|||||||
HF_TOKEN=`cat ${HOME}/.cache/huggingface/token` \
|
HF_TOKEN=`cat ${HOME}/.cache/huggingface/token` \
|
||||||
pytest --durations=0 -s -vv ${root_dir}/integration-tests --gaudi
|
pytest --durations=0 -s -vv ${root_dir}/integration-tests --gaudi
|
||||||
|
|
||||||
|
run-integration-tests-with-all-models:
|
||||||
|
DOCKER_VOLUME=${root_dir}/data \
|
||||||
|
HF_TOKEN=`cat ${HOME}/.cache/huggingface/token` \
|
||||||
|
pytest --durations=0 -s -vv ${root_dir}/integration-tests --gaudi --gaudi-all-models
|
||||||
|
|
||||||
# This is used to capture the expected outputs for the integration tests offering an easy way to add more models to the integration tests
|
# This is used to capture the expected outputs for the integration tests offering an easy way to add more models to the integration tests
|
||||||
capture-expected-outputs-for-integration-tests:
|
capture-expected-outputs-for-integration-tests:
|
||||||
DOCKER_VOLUME=${root_dir}/data \
|
DOCKER_VOLUME=${root_dir}/data \
|
||||||
|
@ -74,6 +74,12 @@ def pytest_addoption(parser):
|
|||||||
parser.addoption(
|
parser.addoption(
|
||||||
"--gaudi", action="store_true", default=False, help="run gaudi tests"
|
"--gaudi", action="store_true", default=False, help="run gaudi tests"
|
||||||
)
|
)
|
||||||
|
parser.addoption(
|
||||||
|
"--gaudi-all-models",
|
||||||
|
action="store_true",
|
||||||
|
default=False,
|
||||||
|
help="Run tests for all models instead of just the default subset",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def pytest_configure(config):
|
def pytest_configure(config):
|
||||||
|
@ -1,30 +1,39 @@
|
|||||||
from typing import Any, Dict
|
from typing import Any, Dict, Generator
|
||||||
|
from _pytest.fixtures import SubRequest
|
||||||
|
|
||||||
from text_generation import AsyncClient
|
from text_generation import AsyncClient
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
def pytest_configure(config):
|
||||||
|
config.addinivalue_line(
|
||||||
|
"markers", "gaudi_all_models: mark test to run with all models"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# The "args" values in TEST_CONFIGS are not optimized for speed but only check that the inference is working for the different models architectures.
|
# The "args" values in TEST_CONFIGS are not optimized for speed but only check that the inference is working for the different models architectures.
|
||||||
TEST_CONFIGS = {
|
TEST_CONFIGS = {
|
||||||
# "meta-llama/Llama-3.1-8B-Instruct-shared": {
|
"meta-llama/Llama-3.1-8B-Instruct-shared": {
|
||||||
# "model_id": "meta-llama/Llama-3.1-8B-Instruct",
|
"model_id": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
# "input": "What is Deep Learning?",
|
"input": "What is Deep Learning?",
|
||||||
# "expected_greedy_output": " A Beginner’s Guide\nDeep learning is a subset of machine learning that involves the use of artificial neural networks to analyze and interpret data. It is a type of",
|
"expected_greedy_output": " A Beginner’s Guide\nDeep learning is a subset of machine learning that involves the use of artificial neural networks to analyze and interpret data. It is a type of",
|
||||||
# "expected_batch_output": " A Beginner’s Guide\nDeep learning is a subset of machine learning that involves the use of artificial neural networks to analyze and interpret data. It is a type of",
|
"expected_batch_output": " A Beginner’s Guide\nDeep learning is a subset of machine learning that involves the use of artificial neural networks to analyze and interpret data. It is a type of",
|
||||||
# "args": [
|
"args": [
|
||||||
# "--sharded",
|
"--sharded",
|
||||||
# "true",
|
"true",
|
||||||
# "--num-shard",
|
"--num-shard",
|
||||||
# "8",
|
"8",
|
||||||
# "--max-input-tokens",
|
"--max-input-tokens",
|
||||||
# "512",
|
"512",
|
||||||
# "--max-total-tokens",
|
"--max-total-tokens",
|
||||||
# "1024",
|
"1024",
|
||||||
# "--max-batch-size",
|
"--max-batch-size",
|
||||||
# "8",
|
"8",
|
||||||
# "--max-batch-prefill-tokens",
|
"--max-batch-prefill-tokens",
|
||||||
# "2048",
|
"2048",
|
||||||
# ],
|
],
|
||||||
# },
|
"run_by_default": True,
|
||||||
|
},
|
||||||
"meta-llama/Llama-3.1-8B-Instruct": {
|
"meta-llama/Llama-3.1-8B-Instruct": {
|
||||||
"model_id": "meta-llama/Llama-3.1-8B-Instruct",
|
"model_id": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"input": "What is Deep Learning?",
|
"input": "What is Deep Learning?",
|
||||||
@ -41,196 +50,195 @@ TEST_CONFIGS = {
|
|||||||
"--max-batch-prefill-tokens",
|
"--max-batch-prefill-tokens",
|
||||||
"2048",
|
"2048",
|
||||||
],
|
],
|
||||||
|
"run_by_default": True,
|
||||||
|
},
|
||||||
|
"meta-llama/Llama-2-7b-chat-hf": {
|
||||||
|
"model_id": "meta-llama/Llama-2-7b-chat-hf",
|
||||||
|
"input": "What is Deep Learning?",
|
||||||
|
"expected_greedy_output": "\n\nDeep learning (also known as deep structured learning) is part of a broader family of machine learning techniques based on artificial neural networks\u2014specific",
|
||||||
|
"expected_batch_output": "\n\nDeep learning (also known as deep structured learning) is part of a broader family of machine learning techniques based on artificial neural networks\u2014specific",
|
||||||
|
"args": [
|
||||||
|
"--max-input-tokens",
|
||||||
|
"512",
|
||||||
|
"--max-total-tokens",
|
||||||
|
"1024",
|
||||||
|
"--max-batch-size",
|
||||||
|
"4",
|
||||||
|
"--max-batch-prefill-tokens",
|
||||||
|
"2048",
|
||||||
|
],
|
||||||
|
},
|
||||||
|
"mistralai/Mistral-7B-Instruct-v0.3": {
|
||||||
|
"model_id": "mistralai/Mistral-7B-Instruct-v0.3",
|
||||||
|
"input": "What is Deep Learning?",
|
||||||
|
"expected_greedy_output": "\n\nDeep learning is a subset of machine learning in artificial intelligence (AI) that has networks capable of learning unsupervised from data that is unstructured",
|
||||||
|
"expected_batch_output": "\n\nDeep learning is a subset of machine learning in artificial intelligence (AI) that has networks capable of learning unsupervised from data that is unstructured",
|
||||||
|
"args": [
|
||||||
|
"--max-input-tokens",
|
||||||
|
"512",
|
||||||
|
"--max-total-tokens",
|
||||||
|
"1024",
|
||||||
|
"--max-batch-size",
|
||||||
|
"4",
|
||||||
|
"--max-batch-prefill-tokens",
|
||||||
|
"2048",
|
||||||
|
],
|
||||||
|
},
|
||||||
|
"bigcode/starcoder2-3b": {
|
||||||
|
"model_id": "bigcode/starcoder2-3b",
|
||||||
|
"input": "What is Deep Learning?",
|
||||||
|
"expected_greedy_output": "\n\nDeep learning is a subset of machine learning that uses artificial neural networks to perform tasks.\n\nNeural networks are a type of machine learning algorithm that",
|
||||||
|
"expected_batch_output": "\n\nDeep learning is a subset of machine learning that uses artificial neural networks to perform tasks.\n\nNeural networks are a type of machine learning algorithm that",
|
||||||
|
"args": [
|
||||||
|
"--max-input-tokens",
|
||||||
|
"512",
|
||||||
|
"--max-total-tokens",
|
||||||
|
"1024",
|
||||||
|
"--max-batch-size",
|
||||||
|
"4",
|
||||||
|
"--max-batch-prefill-tokens",
|
||||||
|
"2048",
|
||||||
|
],
|
||||||
|
},
|
||||||
|
"google/gemma-7b-it": {
|
||||||
|
"model_id": "google/gemma-7b-it",
|
||||||
|
"input": "What is Deep Learning?",
|
||||||
|
"expected_greedy_output": "\n\nDeep learning is a subset of machine learning that uses artificial neural networks to learn from large amounts of data. Neural networks are inspired by the structure and function of",
|
||||||
|
"expected_batch_output": "\n\nDeep learning is a subset of machine learning that uses artificial neural networks to learn from large amounts of data. Neural networks are inspired by the structure and function of",
|
||||||
|
"args": [
|
||||||
|
"--max-input-tokens",
|
||||||
|
"512",
|
||||||
|
"--max-total-tokens",
|
||||||
|
"1024",
|
||||||
|
"--max-batch-size",
|
||||||
|
"4",
|
||||||
|
"--max-batch-prefill-tokens",
|
||||||
|
"2048",
|
||||||
|
],
|
||||||
|
},
|
||||||
|
"Qwen/Qwen2-0.5B-Instruct": {
|
||||||
|
"model_id": "Qwen/Qwen2-0.5B-Instruct",
|
||||||
|
"input": "What is Deep Learning?",
|
||||||
|
"expected_greedy_output": " Deep Learning is a type of machine learning that is based on the principles of artificial neural networks. It is a type of machine learning that is used to train models",
|
||||||
|
"expected_batch_output": " Deep Learning is a type of machine learning that is based on the principles of artificial neural networks. It is a type of machine learning that is used to train models",
|
||||||
|
"args": [
|
||||||
|
"--max-input-tokens",
|
||||||
|
"512",
|
||||||
|
"--max-total-tokens",
|
||||||
|
"1024",
|
||||||
|
"--max-batch-size",
|
||||||
|
"4",
|
||||||
|
"--max-batch-prefill-tokens",
|
||||||
|
"2048",
|
||||||
|
],
|
||||||
|
},
|
||||||
|
"tiiuae/falcon-7b-instruct": {
|
||||||
|
"model_id": "tiiuae/falcon-7b-instruct",
|
||||||
|
"input": "What is Deep Learning?",
|
||||||
|
"expected_greedy_output": "\nDeep learning is a branch of machine learning that uses artificial neural networks to learn and make decisions. It is based on the concept of hierarchical learning, where a",
|
||||||
|
"expected_batch_output": "\nDeep learning is a branch of machine learning that uses artificial neural networks to learn and make decisions. It is based on the concept of hierarchical learning, where a",
|
||||||
|
"args": [
|
||||||
|
"--max-input-tokens",
|
||||||
|
"512",
|
||||||
|
"--max-total-tokens",
|
||||||
|
"1024",
|
||||||
|
"--max-batch-size",
|
||||||
|
"4",
|
||||||
|
],
|
||||||
|
},
|
||||||
|
"microsoft/phi-1_5": {
|
||||||
|
"model_id": "microsoft/phi-1_5",
|
||||||
|
"input": "What is Deep Learning?",
|
||||||
|
"expected_greedy_output": "\n\nDeep Learning is a subfield of Machine Learning that focuses on building neural networks with multiple layers of interconnected nodes. These networks are designed to learn from large",
|
||||||
|
"expected_batch_output": "\n\nDeep Learning is a subfield of Machine Learning that focuses on building neural networks with multiple layers of interconnected nodes. These networks are designed to learn from large",
|
||||||
|
"args": [
|
||||||
|
"--max-input-tokens",
|
||||||
|
"512",
|
||||||
|
"--max-total-tokens",
|
||||||
|
"1024",
|
||||||
|
"--max-batch-size",
|
||||||
|
"4",
|
||||||
|
],
|
||||||
|
},
|
||||||
|
"openai-community/gpt2": {
|
||||||
|
"model_id": "openai-community/gpt2",
|
||||||
|
"input": "What is Deep Learning?",
|
||||||
|
"expected_greedy_output": "\n\nDeep learning is a new field of research that has been around for a long time. It is a new field of research that has been around for a",
|
||||||
|
"expected_batch_output": "\n\nDeep learning is a new field of research that has been around for a long time. It is a new field of research that has been around for a",
|
||||||
|
"args": [
|
||||||
|
"--max-input-tokens",
|
||||||
|
"512",
|
||||||
|
"--max-total-tokens",
|
||||||
|
"1024",
|
||||||
|
"--max-batch-size",
|
||||||
|
"4",
|
||||||
|
],
|
||||||
|
},
|
||||||
|
"EleutherAI/gpt-j-6b": {
|
||||||
|
"model_id": "EleutherAI/gpt-j-6b",
|
||||||
|
"input": "What is Deep Learning?",
|
||||||
|
"expected_greedy_output": "\n\nDeep learning is a subset of machine learning that is based on the idea of neural networks. Neural networks are a type of artificial intelligence that is inspired by",
|
||||||
|
"expected_batch_output": "\n\nDeep learning is a subset of machine learning that is based on the idea of neural networks. Neural networks are a type of artificial intelligence that is inspired by",
|
||||||
|
"args": [
|
||||||
|
"--max-input-tokens",
|
||||||
|
"512",
|
||||||
|
"--max-total-tokens",
|
||||||
|
"1024",
|
||||||
|
"--max-batch-size",
|
||||||
|
"4",
|
||||||
|
],
|
||||||
},
|
},
|
||||||
# "meta-llama/Llama-2-7b-chat-hf": {
|
|
||||||
# "model_id": "meta-llama/Llama-2-7b-chat-hf",
|
|
||||||
# "input": "What is Deep Learning?",
|
|
||||||
# "expected_greedy_output": "\n\nDeep learning (also known as deep structured learning) is part of a broader family of machine learning techniques based on artificial neural networks\u2014specific",
|
|
||||||
# "expected_batch_output": "\n\nDeep learning (also known as deep structured learning) is part of a broader family of machine learning techniques based on artificial neural networks\u2014specific",
|
|
||||||
# "args": [
|
|
||||||
# "--max-input-tokens",
|
|
||||||
# "512",
|
|
||||||
# "--max-total-tokens",
|
|
||||||
# "1024",
|
|
||||||
# "--max-batch-size",
|
|
||||||
# "4",
|
|
||||||
# "--max-batch-prefill-tokens",
|
|
||||||
# "2048",
|
|
||||||
# ],
|
|
||||||
# },
|
|
||||||
# "mistralai/Mistral-7B-Instruct-v0.3": {
|
|
||||||
# "model_id": "mistralai/Mistral-7B-Instruct-v0.3",
|
|
||||||
# "input": "What is Deep Learning?",
|
|
||||||
# "expected_greedy_output": "\n\nDeep learning is a subset of machine learning in artificial intelligence (AI) that has networks capable of learning unsupervised from data that is unstructured",
|
|
||||||
# "expected_batch_output": "\n\nDeep learning is a subset of machine learning in artificial intelligence (AI) that has networks capable of learning unsupervised from data that is unstructured",
|
|
||||||
# "args": [
|
|
||||||
# "--max-input-tokens",
|
|
||||||
# "512",
|
|
||||||
# "--max-total-tokens",
|
|
||||||
# "1024",
|
|
||||||
# "--max-batch-size",
|
|
||||||
# "4",
|
|
||||||
# "--max-batch-prefill-tokens",
|
|
||||||
# "2048",
|
|
||||||
# ],
|
|
||||||
# },
|
|
||||||
# "bigcode/starcoder2-3b": {
|
|
||||||
# "model_id": "bigcode/starcoder2-3b",
|
|
||||||
# "input": "What is Deep Learning?",
|
|
||||||
# "expected_greedy_output": "\n\nDeep learning is a subset of machine learning that uses artificial neural networks to perform tasks.\n\nNeural networks are a type of machine learning algorithm that",
|
|
||||||
# "expected_batch_output": "\n\nDeep learning is a subset of machine learning that uses artificial neural networks to perform tasks.\n\nNeural networks are a type of machine learning algorithm that",
|
|
||||||
# "args": [
|
|
||||||
# "--max-input-tokens",
|
|
||||||
# "512",
|
|
||||||
# "--max-total-tokens",
|
|
||||||
# "1024",
|
|
||||||
# "--max-batch-size",
|
|
||||||
# "4",
|
|
||||||
# "--max-batch-prefill-tokens",
|
|
||||||
# "2048",
|
|
||||||
# ],
|
|
||||||
# },
|
|
||||||
# "google/gemma-7b-it": {
|
|
||||||
# "model_id": "google/gemma-7b-it",
|
|
||||||
# "input": "What is Deep Learning?",
|
|
||||||
# "expected_greedy_output": "\n\nDeep learning is a subset of machine learning that uses artificial neural networks to learn from large amounts of data. Neural networks are inspired by the structure and function of",
|
|
||||||
# "expected_batch_output": "\n\nDeep learning is a subset of machine learning that uses artificial neural networks to learn from large amounts of data. Neural networks are inspired by the structure and function of",
|
|
||||||
# "args": [
|
|
||||||
# "--max-input-tokens",
|
|
||||||
# "512",
|
|
||||||
# "--max-total-tokens",
|
|
||||||
# "1024",
|
|
||||||
# "--max-batch-size",
|
|
||||||
# "4",
|
|
||||||
# "--max-batch-prefill-tokens",
|
|
||||||
# "2048",
|
|
||||||
# ],
|
|
||||||
# },
|
|
||||||
# "Qwen/Qwen2-0.5B-Instruct": {
|
|
||||||
# "model_id": "Qwen/Qwen2-0.5B-Instruct",
|
|
||||||
# "input": "What is Deep Learning?",
|
|
||||||
# "expected_greedy_output": " Deep Learning is a type of machine learning that is based on the principles of artificial neural networks. It is a type of machine learning that is used to train models",
|
|
||||||
# "expected_batch_output": " Deep Learning is a type of machine learning that is based on the principles of artificial neural networks. It is a type of machine learning that is used to train models",
|
|
||||||
# "args": [
|
|
||||||
# "--max-input-tokens",
|
|
||||||
# "512",
|
|
||||||
# "--max-total-tokens",
|
|
||||||
# "1024",
|
|
||||||
# "--max-batch-size",
|
|
||||||
# "4",
|
|
||||||
# "--max-batch-prefill-tokens",
|
|
||||||
# "2048",
|
|
||||||
# ],
|
|
||||||
# },
|
|
||||||
# "tiiuae/falcon-7b-instruct": {
|
|
||||||
# "model_id": "tiiuae/falcon-7b-instruct",
|
|
||||||
# "input": "What is Deep Learning?",
|
|
||||||
# "expected_greedy_output": "\nDeep learning is a branch of machine learning that uses artificial neural networks to learn and make decisions. It is based on the concept of hierarchical learning, where a",
|
|
||||||
# "expected_batch_output": "\nDeep learning is a branch of machine learning that uses artificial neural networks to learn and make decisions. It is based on the concept of hierarchical learning, where a",
|
|
||||||
# "args": [
|
|
||||||
# "--max-input-tokens",
|
|
||||||
# "512",
|
|
||||||
# "--max-total-tokens",
|
|
||||||
# "1024",
|
|
||||||
# "--max-batch-size",
|
|
||||||
# "4",
|
|
||||||
# ],
|
|
||||||
# },
|
|
||||||
# "microsoft/phi-1_5": {
|
|
||||||
# "model_id": "microsoft/phi-1_5",
|
|
||||||
# "input": "What is Deep Learning?",
|
|
||||||
# "expected_greedy_output": "\n\nDeep Learning is a subfield of Machine Learning that focuses on building neural networks with multiple layers of interconnected nodes. These networks are designed to learn from large",
|
|
||||||
# "expected_batch_output": "\n\nDeep Learning is a subfield of Machine Learning that focuses on building neural networks with multiple layers of interconnected nodes. These networks are designed to learn from large",
|
|
||||||
# "args": [
|
|
||||||
# "--max-input-tokens",
|
|
||||||
# "512",
|
|
||||||
# "--max-total-tokens",
|
|
||||||
# "1024",
|
|
||||||
# "--max-batch-size",
|
|
||||||
# "4",
|
|
||||||
# ],
|
|
||||||
# },
|
|
||||||
# "openai-community/gpt2": {
|
|
||||||
# "model_id": "openai-community/gpt2",
|
|
||||||
# "input": "What is Deep Learning?",
|
|
||||||
# "expected_greedy_output": "\n\nDeep learning is a new field of research that has been around for a long time. It is a new field of research that has been around for a",
|
|
||||||
# "expected_batch_output": "\n\nDeep learning is a new field of research that has been around for a long time. It is a new field of research that has been around for a",
|
|
||||||
# "args": [
|
|
||||||
# "--max-input-tokens",
|
|
||||||
# "512",
|
|
||||||
# "--max-total-tokens",
|
|
||||||
# "1024",
|
|
||||||
# "--max-batch-size",
|
|
||||||
# "4",
|
|
||||||
# ],
|
|
||||||
# },
|
|
||||||
# "facebook/opt-125m": {
|
|
||||||
# "model_id": "facebook/opt-125m",
|
|
||||||
# "input": "What is Deep Learning?",
|
|
||||||
# "expected_greedy_output": "\nAbout the Author\n\nAbout the Author\n\nAbout the Author\n\nAbout the Author\n\nAbout the Author\n\nAbout the Author\n\nAbout",
|
|
||||||
# "expected_batch_output": "\nAbout the Author\n\nAbout the Author\n\nAbout the Author\n\nAbout the Author\n\nAbout the Author\n\nAbout the Author\n\nAbout",
|
|
||||||
# "args": [
|
|
||||||
# "--max-input-tokens",
|
|
||||||
# "512",
|
|
||||||
# "--max-total-tokens",
|
|
||||||
# "1024",
|
|
||||||
# "--max-batch-size",
|
|
||||||
# "4",
|
|
||||||
# ],
|
|
||||||
# },
|
|
||||||
# "EleutherAI/gpt-j-6b": {
|
|
||||||
# "model_id": "EleutherAI/gpt-j-6b",
|
|
||||||
# "input": "What is Deep Learning?",
|
|
||||||
# "expected_greedy_output": "\n\nDeep learning is a subset of machine learning that is based on the idea of neural networks. Neural networks are a type of artificial intelligence that is inspired by",
|
|
||||||
# "expected_batch_output": "\n\nDeep learning is a subset of machine learning that is based on the idea of neural networks. Neural networks are a type of artificial intelligence that is inspired by",
|
|
||||||
# "args": [
|
|
||||||
# "--max-input-tokens",
|
|
||||||
# "512",
|
|
||||||
# "--max-total-tokens",
|
|
||||||
# "1024",
|
|
||||||
# "--max-batch-size",
|
|
||||||
# "4",
|
|
||||||
# ],
|
|
||||||
# },
|
|
||||||
}
|
}
|
||||||
|
|
||||||
print(f"Testing {len(TEST_CONFIGS)} models")
|
|
||||||
|
def pytest_generate_tests(metafunc):
|
||||||
|
if "test_config" in metafunc.fixturenames:
|
||||||
|
if metafunc.config.getoption("--gaudi-all-models"):
|
||||||
|
models = list(TEST_CONFIGS.keys())
|
||||||
|
else:
|
||||||
|
models = [
|
||||||
|
name
|
||||||
|
for name, config in TEST_CONFIGS.items()
|
||||||
|
if config.get("run_by_default", False)
|
||||||
|
]
|
||||||
|
print(f"Testing {len(models)} models")
|
||||||
|
metafunc.parametrize("test_config", models, indirect=True)
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="module", params=TEST_CONFIGS.keys())
|
@pytest.fixture(scope="module")
|
||||||
def test_config(request) -> Dict[str, Any]:
|
def test_config(request: SubRequest) -> Dict[str, Any]:
|
||||||
"""Fixture that provides model configurations for testing."""
|
"""Fixture that provides model configurations for testing."""
|
||||||
test_config = TEST_CONFIGS[request.param]
|
model_name = request.param
|
||||||
test_config["test_name"] = request.param
|
test_config = TEST_CONFIGS[model_name]
|
||||||
|
test_config["test_name"] = model_name
|
||||||
return test_config
|
return test_config
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="module")
|
@pytest.fixture(scope="module")
|
||||||
def model_id(test_config):
|
def model_id(test_config: Dict[str, Any]) -> Generator[str, None, None]:
|
||||||
yield test_config["model_id"]
|
yield test_config["model_id"]
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="module")
|
@pytest.fixture(scope="module")
|
||||||
def test_name(test_config):
|
def test_name(test_config: Dict[str, Any]) -> Generator[str, None, None]:
|
||||||
yield test_config["test_name"]
|
yield test_config["test_name"]
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="module")
|
@pytest.fixture(scope="module")
|
||||||
def expected_outputs(test_config):
|
def expected_outputs(test_config: Dict[str, Any]) -> Dict[str, str]:
|
||||||
return {
|
return {
|
||||||
"greedy": test_config["expected_greedy_output"],
|
"greedy": test_config["expected_greedy_output"],
|
||||||
# "sampling": model_config["expected_sampling_output"],
|
|
||||||
"batch": test_config["expected_batch_output"],
|
"batch": test_config["expected_batch_output"],
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="module")
|
@pytest.fixture(scope="module")
|
||||||
def input(test_config):
|
def input(test_config: Dict[str, Any]) -> str:
|
||||||
return test_config["input"]
|
return test_config["input"]
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="module")
|
@pytest.fixture(scope="module")
|
||||||
def tgi_service(gaudi_launcher, model_id, test_name):
|
def tgi_service(gaudi_launcher, model_id: str, test_name: str):
|
||||||
with gaudi_launcher(model_id, test_name) as tgi_service:
|
with gaudi_launcher(model_id, test_name) as tgi_service:
|
||||||
yield tgi_service
|
yield tgi_service
|
||||||
|
|
||||||
@ -242,8 +250,9 @@ async def tgi_client(tgi_service) -> AsyncClient:
|
|||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
|
@pytest.mark.all_models
|
||||||
async def test_model_single_request(
|
async def test_model_single_request(
|
||||||
tgi_client: AsyncClient, expected_outputs: Dict[str, Any], input: str
|
tgi_client: AsyncClient, expected_outputs: Dict[str, str], input: str
|
||||||
):
|
):
|
||||||
# Bounded greedy decoding without input
|
# Bounded greedy decoding without input
|
||||||
response = await tgi_client.generate(
|
response = await tgi_client.generate(
|
||||||
@ -255,8 +264,12 @@ async def test_model_single_request(
|
|||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
|
@pytest.mark.all_models
|
||||||
async def test_model_multiple_requests(
|
async def test_model_multiple_requests(
|
||||||
tgi_client, gaudi_generate_load, expected_outputs, input
|
tgi_client: AsyncClient,
|
||||||
|
gaudi_generate_load,
|
||||||
|
expected_outputs: Dict[str, str],
|
||||||
|
input: str,
|
||||||
):
|
):
|
||||||
num_requests = 4
|
num_requests = 4
|
||||||
responses = await gaudi_generate_load(
|
responses = await gaudi_generate_load(
|
||||||
|
Loading…
Reference in New Issue
Block a user