text-generation-inference/integration-tests/neuron/test_implicit_env.py
David Corvoysier 5eec3a8bb6
Avoid running neuron integration tests twice (#3054)
* test(neuron): refactor to prepare batch export

* test(neuron): add helper to batch export models

Also rename fixture file fro clarity.

* ci(neuron): do not run tests twice

* ci(neuron): rename precompilation job

* test(neuron): remove redundant subdirectory

* test(neuron): remove erroneous line

* doc(neuron): update links to installation page

* feat(neuron): cleanup Dockerfile

CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse is not required anymore.

* test(neuron): try to reduce download errors
2025-02-26 12:15:01 +01:00

65 lines
2.0 KiB
Python

import os
import pytest
@pytest.fixture(scope="module", params=["hub-neuron", "hub", "local-neuron"])
async def tgi_service(request, neuron_launcher, neuron_model_config):
"""Expose a TGI service corresponding to a model configuration
For each model configuration, the service will be started using the following
deployment options:
- from the hub original model (export parameters chosen after hub lookup),
- from the hub pre-exported neuron model,
- from a local path to the neuron model.
"""
# the tgi_env.py script will take care of setting these
for var in [
"MAX_BATCH_SIZE",
"MAX_INPUT_TOKENS",
"MAX_TOTAL_TOKENS",
"HF_NUM_CORES",
"HF_AUTO_CAST_TYPE",
]:
if var in os.environ:
del os.environ[var]
if request.param == "hub":
model_name_or_path = neuron_model_config["model_id"]
elif request.param == "hub-neuron":
model_name_or_path = neuron_model_config["neuron_model_id"]
else:
model_name_or_path = neuron_model_config["neuron_model_path"]
service_name = neuron_model_config["name"]
with neuron_launcher(service_name, model_name_or_path) as tgi_service:
await tgi_service.health(600)
yield tgi_service
@pytest.mark.asyncio
async def test_model_single_request(tgi_service):
# Just verify that the generation works, and nothing is raised, with several set of params
# No params
await tgi_service.client.text_generation(
"What is Deep Learning?",
)
response = await tgi_service.client.text_generation(
"How to cook beans ?",
max_new_tokens=17,
details=True,
decoder_input_details=True,
)
assert response.details.generated_tokens == 17
# Sampling
await tgi_service.client.text_generation(
"What is Deep Learning?",
do_sample=True,
top_k=50,
top_p=0.9,
repetition_penalty=1.2,
max_new_tokens=128,
seed=42,
)