mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 04:14:52 +00:00
feat: prefer disable grammar
This commit is contained in:
parent
f0cdd9c8ea
commit
63c52fb22d
@ -379,12 +379,12 @@ Options:
|
||||
[env: TOKENIZER_CONFIG_PATH=]
|
||||
|
||||
```
|
||||
## GRAMMAR_SUPPORT
|
||||
## DISABLE_GRAMMAR_SUPPORT
|
||||
```shell
|
||||
--grammar-support
|
||||
Enable outlines grammar constrained generation This is a feature that allows you to generate text that follows a specific grammar
|
||||
--disable-grammar-support
|
||||
Disable outlines grammar constrained generation. This is a feature that allows you to generate text that follows a specific grammar
|
||||
|
||||
[env: GRAMMAR_SUPPORT=]
|
||||
[env: DISABLE_GRAMMAR_SUPPORT=]
|
||||
|
||||
```
|
||||
## ENV
|
||||
|
@ -231,7 +231,7 @@ def launcher(event_loop):
|
||||
quantize: Optional[str] = None,
|
||||
trust_remote_code: bool = False,
|
||||
use_flash_attention: bool = True,
|
||||
grammar_support: bool = False,
|
||||
disable_grammar_support: bool = False,
|
||||
dtype: Optional[str] = None,
|
||||
):
|
||||
port = random.randint(8000, 10_000)
|
||||
@ -255,8 +255,8 @@ def launcher(event_loop):
|
||||
|
||||
env = os.environ
|
||||
|
||||
if grammar_support:
|
||||
args.append("--grammar-support")
|
||||
if disable_grammar_support:
|
||||
args.append("--disable-grammar-support")
|
||||
if num_shard is not None:
|
||||
args.extend(["--num-shard", str(num_shard)])
|
||||
if quantize is not None:
|
||||
@ -305,7 +305,7 @@ def launcher(event_loop):
|
||||
args = ["--model-id", model_id, "--env"]
|
||||
|
||||
if grammar_support:
|
||||
args.append("--grammar-support")
|
||||
args.append("--disable-grammar-support")
|
||||
if num_shard is not None:
|
||||
args.extend(["--num-shard", str(num_shard)])
|
||||
if quantize is not None:
|
||||
|
@ -7,7 +7,7 @@ from text_generation.types import GrammarType
|
||||
@pytest.fixture(scope="module")
|
||||
def flash_llama_grammar_handle(launcher):
|
||||
with launcher(
|
||||
"TinyLlama/TinyLlama-1.1B-Chat-v1.0", num_shard=2, grammar_support=True
|
||||
"TinyLlama/TinyLlama-1.1B-Chat-v1.0", num_shard=2, disable_grammar_support=False
|
||||
) as handle:
|
||||
yield handle
|
||||
|
||||
|
@ -382,10 +382,10 @@ struct Args {
|
||||
#[clap(long, env)]
|
||||
tokenizer_config_path: Option<String>,
|
||||
|
||||
/// Enable outlines grammar constrained generation
|
||||
/// Disable outlines grammar constrained generation.
|
||||
/// This is a feature that allows you to generate text that follows a specific grammar.
|
||||
#[clap(long, env)]
|
||||
grammar_support: bool,
|
||||
disable_grammar_support: bool,
|
||||
|
||||
/// Display a lot of information about your runtime environment
|
||||
#[clap(long, short, action)]
|
||||
@ -1057,8 +1057,8 @@ fn spawn_webserver(
|
||||
];
|
||||
|
||||
// Grammar support
|
||||
if args.grammar_support {
|
||||
router_args.push("--grammar-support".to_string());
|
||||
if args.disable_grammar_support {
|
||||
router_args.push("--disable-grammar-support".to_string());
|
||||
}
|
||||
|
||||
// Tokenizer config path
|
||||
|
@ -76,7 +76,7 @@ struct Args {
|
||||
#[clap(long, env, default_value_t = false)]
|
||||
messages_api_enabled: bool,
|
||||
#[clap(long, env, default_value_t = false)]
|
||||
grammar_support: bool,
|
||||
disable_grammar_support: bool,
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
@ -110,7 +110,7 @@ async fn main() -> Result<(), RouterError> {
|
||||
ngrok_authtoken,
|
||||
ngrok_edge,
|
||||
messages_api_enabled,
|
||||
grammar_support,
|
||||
disable_grammar_support,
|
||||
} = args;
|
||||
|
||||
// Launch Tokio runtime
|
||||
@ -362,7 +362,7 @@ async fn main() -> Result<(), RouterError> {
|
||||
ngrok_edge,
|
||||
tokenizer_config,
|
||||
messages_api_enabled,
|
||||
grammar_support,
|
||||
disable_grammar_support,
|
||||
)
|
||||
.await?;
|
||||
Ok(())
|
||||
|
@ -21,7 +21,7 @@ pub struct Validation {
|
||||
max_top_n_tokens: u32,
|
||||
max_input_length: usize,
|
||||
max_total_tokens: usize,
|
||||
grammar_support: bool,
|
||||
disable_grammar_support: bool,
|
||||
/// Channel to communicate with the background tokenization task
|
||||
sender: Option<mpsc::UnboundedSender<TokenizerRequest>>,
|
||||
}
|
||||
@ -35,7 +35,7 @@ impl Validation {
|
||||
max_top_n_tokens: u32,
|
||||
max_input_length: usize,
|
||||
max_total_tokens: usize,
|
||||
grammar_support: bool,
|
||||
disable_grammar_support: bool,
|
||||
) -> Self {
|
||||
// If we have a fast tokenizer
|
||||
let sender = if let Some(tokenizer) = tokenizer {
|
||||
@ -70,7 +70,7 @@ impl Validation {
|
||||
max_top_n_tokens,
|
||||
max_input_length,
|
||||
max_total_tokens,
|
||||
grammar_support,
|
||||
disable_grammar_support,
|
||||
}
|
||||
}
|
||||
|
||||
@ -308,7 +308,7 @@ impl Validation {
|
||||
let (grammar, grammar_type) = match grammar {
|
||||
Some(grammar) => {
|
||||
// Ensure that grammar is not set if it's not supported
|
||||
if !self.grammar_support {
|
||||
if self.disable_grammar_support {
|
||||
return Err(ValidationError::Grammar);
|
||||
}
|
||||
match grammar {
|
||||
@ -502,7 +502,7 @@ mod tests {
|
||||
let max_input_length = 5;
|
||||
let max_total_tokens = 6;
|
||||
let workers = 1;
|
||||
let grammar_support = false;
|
||||
let disable_grammar_support = true;
|
||||
let validation = Validation::new(
|
||||
workers,
|
||||
tokenizer,
|
||||
@ -511,7 +511,7 @@ mod tests {
|
||||
max_top_n_tokens,
|
||||
max_input_length,
|
||||
max_total_tokens,
|
||||
grammar_support,
|
||||
disable_grammar_support,
|
||||
);
|
||||
|
||||
let max_new_tokens = 10;
|
||||
@ -532,7 +532,7 @@ mod tests {
|
||||
let max_top_n_tokens = 4;
|
||||
let max_input_length = 5;
|
||||
let max_total_tokens = 6;
|
||||
let grammar_support = false;
|
||||
let disable_grammar_support = true;
|
||||
let workers = 1;
|
||||
let validation = Validation::new(
|
||||
workers,
|
||||
@ -542,7 +542,7 @@ mod tests {
|
||||
max_top_n_tokens,
|
||||
max_input_length,
|
||||
max_total_tokens,
|
||||
grammar_support,
|
||||
disable_grammar_support,
|
||||
);
|
||||
|
||||
let max_new_tokens = 10;
|
||||
@ -564,7 +564,7 @@ mod tests {
|
||||
let max_input_length = 5;
|
||||
let max_total_tokens = 6;
|
||||
let workers = 1;
|
||||
let grammar_support = false;
|
||||
let disable_grammar_support = true;
|
||||
let validation = Validation::new(
|
||||
workers,
|
||||
tokenizer,
|
||||
@ -573,7 +573,7 @@ mod tests {
|
||||
max_top_n_tokens,
|
||||
max_input_length,
|
||||
max_total_tokens,
|
||||
grammar_support,
|
||||
disable_grammar_support,
|
||||
);
|
||||
match validation
|
||||
.validate(GenerateRequest {
|
||||
@ -600,7 +600,7 @@ mod tests {
|
||||
let max_input_length = 5;
|
||||
let max_total_tokens = 106;
|
||||
let workers = 1;
|
||||
let grammar_support = false;
|
||||
let disable_grammar_support = true;
|
||||
let validation = Validation::new(
|
||||
workers,
|
||||
tokenizer,
|
||||
@ -609,7 +609,7 @@ mod tests {
|
||||
max_top_n_tokens,
|
||||
max_input_length,
|
||||
max_total_tokens,
|
||||
grammar_support,
|
||||
disable_grammar_support,
|
||||
);
|
||||
match validation
|
||||
.validate(GenerateRequest {
|
||||
@ -665,7 +665,7 @@ mod tests {
|
||||
let max_input_length = 5;
|
||||
let max_total_tokens = 106;
|
||||
let workers = 1;
|
||||
let grammar_support = false;
|
||||
let disable_grammar_support = true;
|
||||
let validation = Validation::new(
|
||||
workers,
|
||||
tokenizer,
|
||||
@ -674,7 +674,7 @@ mod tests {
|
||||
max_top_n_tokens,
|
||||
max_input_length,
|
||||
max_total_tokens,
|
||||
grammar_support,
|
||||
disable_grammar_support,
|
||||
);
|
||||
match validation
|
||||
.validate(GenerateRequest {
|
||||
|
Loading…
Reference in New Issue
Block a user