text-generation-inference/server/tests/conftest.py
Nicolas Patry 8d01848370
Update server tests
- Default to throughput test in k6
- Use TGI_WIGGLE_ROOM to adjust wiggle room
2024-08-28 15:42:05 +02:00

24 lines
541 B
Python

import pytest
import os
from text_generation_server.pb import generate_pb2
os.environ["USE_PREFIX_CACHING"] = "1"
os.environ["ATTENTION"] = "flashinfer"
@pytest.fixture
def default_pb_parameters():
return generate_pb2.NextTokenChooserParameters(
temperature=1.0,
repetition_penalty=1.0,
top_k=0,
top_p=1.0,
typical_p=1.0,
do_sample=False,
)
@pytest.fixture
def default_pb_stop_parameters():
return generate_pb2.StoppingCriteriaParameters(stop_sequences=[], max_new_tokens=10)