mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-21 14:52:20 +00:00
* wip(gaudi): import server and dockerfile from tgi-gaudi fork * feat(gaudi): new gaudi backend working * fix: fix style * fix prehooks issues * fix(gaudi): refactor server and implement requested changes
24 lines
541 B
Python
24 lines
541 B
Python
import pytest
|
|
import os
|
|
from text_generation_server.pb import generate_pb2
|
|
|
|
os.environ["USE_PREFIX_CACHING"] = "1"
|
|
os.environ["ATTENTION"] = "flashinfer"
|
|
|
|
|
|
@pytest.fixture
|
|
def default_pb_parameters():
|
|
return generate_pb2.NextTokenChooserParameters(
|
|
temperature=1.0,
|
|
repetition_penalty=1.0,
|
|
top_k=0,
|
|
top_p=1.0,
|
|
typical_p=1.0,
|
|
do_sample=False,
|
|
)
|
|
|
|
|
|
@pytest.fixture
|
|
def default_pb_stop_parameters():
|
|
return generate_pb2.StoppingCriteriaParameters(stop_sequences=[], max_new_tokens=10)
|