mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 12:24:53 +00:00
feat: add grammar_support to test launcher
This commit is contained in:
parent
95e577b971
commit
5ba1baccb0
@ -224,6 +224,7 @@ def launcher(event_loop):
|
|||||||
quantize: Optional[str] = None,
|
quantize: Optional[str] = None,
|
||||||
trust_remote_code: bool = False,
|
trust_remote_code: bool = False,
|
||||||
use_flash_attention: bool = True,
|
use_flash_attention: bool = True,
|
||||||
|
grammar_support: bool = False,
|
||||||
dtype: Optional[str] = None,
|
dtype: Optional[str] = None,
|
||||||
):
|
):
|
||||||
port = random.randint(8000, 10_000)
|
port = random.randint(8000, 10_000)
|
||||||
@ -247,6 +248,8 @@ def launcher(event_loop):
|
|||||||
|
|
||||||
env = os.environ
|
env = os.environ
|
||||||
|
|
||||||
|
if grammar_support:
|
||||||
|
args.append("--grammar-support")
|
||||||
if num_shard is not None:
|
if num_shard is not None:
|
||||||
args.extend(["--num-shard", str(num_shard)])
|
args.extend(["--num-shard", str(num_shard)])
|
||||||
if quantize is not None:
|
if quantize is not None:
|
||||||
|
@ -4,7 +4,7 @@ import json
|
|||||||
|
|
||||||
@pytest.fixture(scope="module")
|
@pytest.fixture(scope="module")
|
||||||
def flash_llama_grammar_handle(launcher):
|
def flash_llama_grammar_handle(launcher):
|
||||||
with launcher("TinyLlama/TinyLlama-1.1B-Chat-v1.0", num_shard=2) as handle:
|
with launcher("TinyLlama/TinyLlama-1.1B-Chat-v1.0", num_shard=2, grammar_support=True) as handle:
|
||||||
yield handle
|
yield handle
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user