test(neuron): merge integration tests and fixtures

2025-09-18 15:54:53 +00:00 · 2025-02-18 10:32:10 +00:00 · 2025-02-18 10:32:10 +00:00 · a3dcdab706
commit a3dcdab706
parent 68e1c608f6
7 changed files with 159 additions and 21 deletions
--- a/backends/neuron/Makefile
+++ b/backends/neuron/Makefile
@ -33,7 +33,3 @@ install_server:
 test_server: install_server
 	python -m pip install -r ${mkfile_dir}/tests/requirements.txt
 	python -m pytest -sv ${mkfile_dir}/tests/server
 test_integration: image
 	python -m pip install -r ${mkfile_dir}/tests/requirements.txt
 	python -m pytest -sv ${mkfile_dir}/tests/integration
--- a/backends/neuron/tests/conftest.py
+++ b/backends/neuron/tests/conftest.py
@ -1 +1 @@
-pytest_plugins = ["fixtures.service", "fixtures.model"]
+pytest_plugins = ["fixtures.model"]
--- a/integration-tests/conftest.py
+++ b/integration-tests/conftest.py
@ -1,3 +1,4 @@
 pytest_plugins = ["fixtures.neuron.service", "fixtures.neuron.model"]
 # ruff: noqa: E402
 from _pytest.fixtures import SubRequest
 import requests
--- a/integration-tests/fixtures/neuron/model.py
+++ b/integration-tests/fixtures/neuron/model.py
@ -0,0 +1,129 @@
 import copy
 import logging
 import subprocess
 import sys
 from tempfile import TemporaryDirectory
 import huggingface_hub
 import pytest
 from transformers import AutoTokenizer
 from optimum.neuron import NeuronModelForCausalLM
 from optimum.neuron.utils import synchronize_hub_cache
 from optimum.neuron.version import __sdk_version__ as sdk_version
 from optimum.neuron.version import __version__ as version
 logging.basicConfig(
    level=logging.INFO,
    format="[%(asctime)s] %(levelname)s [%(filename)s.%(funcName)s:%(lineno)d] %(message)s",
    stream=sys.stdout,
 )
 logger = logging.getLogger(__file__)
 OPTIMUM_CACHE_REPO_ID = "optimum-internal-testing/neuron-testing-cache"
 # All model configurations below will be added to the neuron_model_config fixture
 MODEL_CONFIGURATIONS = {
    "gpt2": {
        "model_id": "gpt2",
        "export_kwargs": {"batch_size": 4, "sequence_length": 1024, "num_cores": 2, "auto_cast_type": "fp16"},
    },
    "llama": {
        "model_id": "NousResearch/Hermes-2-Theta-Llama-3-8B",
        "export_kwargs": {"batch_size": 4, "sequence_length": 2048, "num_cores": 2, "auto_cast_type": "fp16"},
    },
    "mistral": {
        "model_id": "optimum/mistral-1.1b-testing",
        "export_kwargs": {"batch_size": 4, "sequence_length": 4096, "num_cores": 2, "auto_cast_type": "bf16"},
    },
    "qwen2": {
        "model_id": "Qwen/Qwen2.5-0.5B",
        "export_kwargs": {"batch_size": 4, "sequence_length": 4096, "num_cores": 2, "auto_cast_type": "fp16"},
    },
    "granite": {
        "model_id": "ibm-granite/granite-3.1-2b-instruct",
        "export_kwargs": {"batch_size": 4, "sequence_length": 4096, "num_cores": 2, "auto_cast_type": "bf16"},
    },
 }
 def get_hub_neuron_model_id(config_name: str):
    return f"optimum-internal-testing/neuron-testing-{version}-{sdk_version}-{config_name}"
 def export_model(model_id, export_kwargs, neuron_model_path):
    export_command = ["optimum-cli", "export", "neuron", "-m", model_id, "--task", "text-generation"]
    for kwarg, value in export_kwargs.items():
        export_command.append(f"--{kwarg}")
        export_command.append(str(value))
    export_command.append(neuron_model_path)
    logger.info(f"Exporting {model_id} with {export_kwargs}")
    try:
        subprocess.run(export_command, check=True)
    except subprocess.CalledProcessError as e:
        raise ValueError(f"Failed to export model: {e}")
@pytest.fixture(scope="session", params=MODEL_CONFIGURATIONS.keys())
 def neuron_model_config(request):
    """Expose a pre-trained neuron model
    The fixture first makes sure the following model artifacts are present on the hub:
    - exported neuron model under optimum-internal-testing/neuron-testing-<version>-<name>,
    - cached artifacts under optimum-internal-testing/neuron-testing-cache.
    If not, it will export the model and push it to the hub.
    It then fetches the model locally and return a dictionary containing:
    - a configuration name,
    - the original model id,
    - the export parameters,
    - the neuron model id,
    - the neuron model local path.
    For each exposed model, the local directory is maintained for the duration of the
    test session and cleaned up afterwards.
    The hub model artifacts are never cleaned up and persist accross sessions.
    They must be cleaned up manually when the optimum-neuron version changes.
    """
    config_name = request.param
    model_config = copy.deepcopy(MODEL_CONFIGURATIONS[request.param])
    model_id = model_config["model_id"]
    export_kwargs = model_config["export_kwargs"]
    neuron_model_id = get_hub_neuron_model_id(config_name)
    with TemporaryDirectory() as neuron_model_path:
        hub = huggingface_hub.HfApi()
        if hub.repo_exists(neuron_model_id):
            logger.info(f"Fetching {neuron_model_id} from the HuggingFace hub")
            hub.snapshot_download(neuron_model_id, local_dir=neuron_model_path)
        else:
            export_model(model_id, export_kwargs, neuron_model_path)
            tokenizer = AutoTokenizer.from_pretrained(model_id)
            tokenizer.save_pretrained(neuron_model_path)
            del tokenizer
            # Create the test model on the hub
            hub.create_repo(neuron_model_id, private=True)
            hub.upload_folder(
                folder_path=neuron_model_path,
                repo_id=neuron_model_id,
                ignore_patterns=[NeuronModelForCausalLM.CHECKPOINT_DIR + "/*"],
            )
            # Make sure it is cached
            synchronize_hub_cache(cache_repo_id=OPTIMUM_CACHE_REPO_ID)
        # Add dynamic parameters to the model configuration
        model_config["neuron_model_path"] = neuron_model_path
        model_config["neuron_model_id"] = neuron_model_id
        # Also add model configuration name to allow tests to adapt their expectations
        model_config["name"] = config_name
        # Yield instead of returning to keep a reference to the temporary directory.
        # It will go out of scope and be released only once all tests needing the fixture
        # have been completed.
        logger.info(f"{config_name} ready for testing ...")
        yield model_config
        logger.info(f"Done with {config_name}")
@pytest.fixture(scope="module")
 def neuron_model_path(neuron_model_config):
    yield neuron_model_config["neuron_model_path"]
--- a/integration-tests/fixtures/neuron/service.py
+++ b/integration-tests/fixtures/neuron/service.py
@ -18,9 +18,20 @@ from huggingface_hub import AsyncInferenceClient, TextGenerationOutput
 OPTIMUM_CACHE_REPO_ID = "optimum-internal-testing/neuron-testing-cache"
 DOCKER_IMAGE = os.getenv("DOCKER_IMAGE", "text-generation-inference:latest-neuron")
 HF_TOKEN = huggingface_hub.get_token()
 def get_tgi_docker_image():
    docker_image = os.getenv("DOCKER_IMAGE", None)
    if docker_image is None:
        client = docker.from_env()
        images = client.images.list(filters={"reference": "text-generation-inference"})
        if not images:
            raise ValueError("No text-generation-inference image found on this host to run tests.")
        docker_image = images[0].tags[0]
    return docker_image
 logging.basicConfig(
    level=logging.INFO,
    format="[%(asctime)s] %(levelname)s [%(filename)s.%(funcName)s:%(lineno)d] %(message)s",
@ -83,7 +94,7 @@ def event_loop():
@pytest.fixture(scope="module")
-def launcher(event_loop):
+def neuron_launcher(event_loop):
    """Utility fixture to expose a TGI service.
    The fixture uses a single event loop for each module, but it can create multiple
@ -130,15 +141,16 @@ def launcher(event_loop):
            if var in os.environ:
                env[var] = os.environ[var]
        base_image = get_tgi_docker_image()
        if os.path.isdir(model_name_or_path):
            # Create a sub-image containing the model to workaround docker dind issues preventing
            # to share a volume from the container running tests
-            docker_tag = f"{container_name}-img"
+            test_image = f"{container_name}-img"
            logger.info(
                "Building image on the flight derivated from %s, tagged with %s",
-                DOCKER_IMAGE,
+                base_image,
-                docker_tag,
+                test_image,
            )
            with tempfile.TemporaryDirectory() as context_dir:
                # Copy model directory to build context
@ -147,17 +159,17 @@ def launcher(event_loop):
                # Create Dockerfile
                container_model_id = f"/data/{model_name_or_path}"
                docker_content = f"""
-                FROM {DOCKER_IMAGE}
+                FROM {base_image}
                COPY model {container_model_id}
                """
                with open(os.path.join(context_dir, "Dockerfile"), "wb") as f:
                    f.write(docker_content.encode("utf-8"))
                    f.flush()
-                image, logs = client.images.build(path=context_dir, dockerfile=f.name, tag=docker_tag)
+                image, logs = client.images.build(path=context_dir, dockerfile=f.name, tag=test_image)
            logger.info("Successfully built image %s", image.id)
            logger.debug("Build logs %s", logs)
        else:
-            docker_tag = DOCKER_IMAGE
+            test_image = base_image
            image = None
            container_model_id = model_name_or_path
@ -167,7 +179,7 @@ def launcher(event_loop):
            args.append("--trust-remote-code")
        container = client.containers.run(
-            docker_tag,
+            test_image,
            command=args,
            name=container_name,
            environment=env,
@ -210,7 +222,7 @@ def launcher(event_loop):
@pytest.fixture(scope="module")
-def generate_load():
+def neuron_generate_load():
    """A utility fixture to launch multiple asynchronous TGI requests in parallel
    Args:
--- a/integration-tests/neuron/integration/test_generate.py
+++ b/integration-tests/neuron/integration/test_generate.py
@ -3,10 +3,10 @@ import pytest
@pytest.fixture
-async def tgi_service(launcher, neuron_model_config):
+async def tgi_service(neuron_launcher, neuron_model_config):
    model_name_or_path = neuron_model_config["neuron_model_path"]
    service_name = neuron_model_config["name"]
-    with launcher(service_name, model_name_or_path) as tgi_service:
+    with neuron_launcher(service_name, model_name_or_path) as tgi_service:
        await tgi_service.health(600)
        yield tgi_service
@ -71,9 +71,9 @@ async def test_model_single_request(tgi_service):
@pytest.mark.asyncio
-async def test_model_multiple_requests(tgi_service, generate_load):
+async def test_model_multiple_requests(tgi_service, neuron_generate_load):
    num_requests = 4
-    responses = await generate_load(
+    responses = await neuron_generate_load(
        tgi_service.client,
        "What is Deep Learning?",
        max_new_tokens=17,
--- a/integration-tests/neuron/integration/test_implicit_env.py
+++ b/integration-tests/neuron/integration/test_implicit_env.py
@ -5,7 +5,7 @@ from huggingface_hub.errors import ValidationError
@pytest.fixture(scope="module", params=["hub-neuron", "hub", "local-neuron"])
-async def tgi_service(request, launcher, neuron_model_config):
+async def tgi_service(request, neuron_launcher, neuron_model_config):
    """Expose a TGI service corresponding to a model configuration
    For each model configuration, the service will be started using the following
@ -31,7 +31,7 @@ async def tgi_service(request, launcher, neuron_model_config):
    else:
        model_name_or_path = neuron_model_config["neuron_model_path"]
    service_name = neuron_model_config["name"]
-    with launcher(service_name, model_name_or_path) as tgi_service:
+    with neuron_launcher(service_name, model_name_or_path) as tgi_service:
        await tgi_service.health(600)
        yield tgi_service
`@ -1 +1 @@`
	`pytest_plugins = ["fixtures.service", "fixtures.model"]`	`pytest_plugins = ["fixtures.model"]`