text-generation-inference/backends/neuron/tests/fixtures/model.py
2025-06-06 15:31:05 +00:00

119 lines
3.7 KiB
Python

import copy
import logging
import subprocess
import sys
from tempfile import TemporaryDirectory
import os
import pytest
from transformers import AutoTokenizer
from optimum.neuron.cache import synchronize_hub_cache
logging.basicConfig(
level=logging.INFO,
format="[%(asctime)s] %(levelname)s [%(filename)s.%(funcName)s:%(lineno)d] %(message)s",
stream=sys.stdout,
)
logger = logging.getLogger(__file__)
OPTIMUM_CACHE_REPO_ID = "optimum-internal-testing/neuron-testing-cache"
# All model configurations below will be added to the neuron_model_config fixture
MODEL_CONFIGURATIONS = {
"llama": {
"model_id": "unsloth/Llama-3.2-1B-Instruct",
"export_kwargs": {
"batch_size": 4,
"sequence_length": 4096,
"num_cores": 2,
"auto_cast_type": "bf16",
},
},
"qwen2": {
"model_id": "Qwen/Qwen2.5-0.5B",
"export_kwargs": {
"batch_size": 4,
"sequence_length": 4096,
"num_cores": 2,
"auto_cast_type": "bf16",
},
},
"granite": {
"model_id": "ibm-granite/granite-3.1-2b-instruct",
"export_kwargs": {
"batch_size": 4,
"sequence_length": 4096,
"num_cores": 2,
"auto_cast_type": "bf16",
},
},
}
def export_model(model_id, export_kwargs, neuron_model_path):
export_command = [
"optimum-cli",
"export",
"neuron",
"-m",
model_id,
"--task",
"text-generation",
]
for kwarg, value in export_kwargs.items():
export_command.append(f"--{kwarg}")
export_command.append(str(value))
export_command.append(neuron_model_path)
logger.info(f"Exporting {model_id} with {export_kwargs}")
try:
subprocess.run(export_command, check=True)
except subprocess.CalledProcessError as e:
raise ValueError(f"Failed to export model: {e}")
@pytest.fixture(scope="session", params=MODEL_CONFIGURATIONS.keys())
def neuron_model_config(request):
"""Expose a pre-trained neuron model
The fixture exports a model locally and returns a dictionary containing:
- a configuration name,
- the original model id,
- the export parameters,
- the neuron model local path.
For each exposed model, the local directory is maintained for the duration of the
test session and cleaned up afterwards.
"""
config_name = request.param
model_config = copy.deepcopy(MODEL_CONFIGURATIONS[request.param])
model_id = model_config["model_id"]
export_kwargs = model_config["export_kwargs"]
with TemporaryDirectory() as neuron_model_path:
export_model(model_id, export_kwargs, neuron_model_path)
synchronize_hub_cache(cache_repo_id=OPTIMUM_CACHE_REPO_ID)
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.save_pretrained(neuron_model_path)
del tokenizer
# Add dynamic parameters to the model configuration
model_config["neuron_model_path"] = neuron_model_path
# Also add model configuration name to allow tests to adapt their expectations
model_config["name"] = config_name
# Yield instead of returning to keep a reference to the temporary directory.
# It will go out of scope and be released only once all tests needing the fixture
# have been completed.
logger.info(f"{config_name} ready for testing ...")
os.environ["CUSTOM_CACHE_REPO"] = OPTIMUM_CACHE_REPO_ID
yield model_config
logger.info(f"Done with {config_name}")
@pytest.fixture(scope="module")
def neuron_model_path(neuron_model_config):
yield neuron_model_config["neuron_model_path"]