feat: prefer disable grammar

This commit is contained in:
drbh 2024-02-14 17:18:04 +00:00
parent f0cdd9c8ea
commit 63c52fb22d
6 changed files with 30 additions and 30 deletions

View File

@ -379,12 +379,12 @@ Options:
[env: TOKENIZER_CONFIG_PATH=]
```
## GRAMMAR_SUPPORT
## DISABLE_GRAMMAR_SUPPORT
```shell
--grammar-support
Enable outlines grammar constrained generation This is a feature that allows you to generate text that follows a specific grammar
--disable-grammar-support
Disable outlines grammar constrained generation. This is a feature that allows you to generate text that follows a specific grammar
[env: GRAMMAR_SUPPORT=]
[env: DISABLE_GRAMMAR_SUPPORT=]
```
## ENV

View File

@ -231,7 +231,7 @@ def launcher(event_loop):
quantize: Optional[str] = None,
trust_remote_code: bool = False,
use_flash_attention: bool = True,
grammar_support: bool = False,
disable_grammar_support: bool = False,
dtype: Optional[str] = None,
):
port = random.randint(8000, 10_000)
@ -255,8 +255,8 @@ def launcher(event_loop):
env = os.environ
if grammar_support:
args.append("--grammar-support")
if disable_grammar_support:
args.append("--disable-grammar-support")
if num_shard is not None:
args.extend(["--num-shard", str(num_shard)])
if quantize is not None:
@ -305,7 +305,7 @@ def launcher(event_loop):
args = ["--model-id", model_id, "--env"]
if grammar_support:
args.append("--grammar-support")
args.append("--disable-grammar-support")
if num_shard is not None:
args.extend(["--num-shard", str(num_shard)])
if quantize is not None:

View File

@ -7,7 +7,7 @@ from text_generation.types import GrammarType
@pytest.fixture(scope="module")
def flash_llama_grammar_handle(launcher):
with launcher(
"TinyLlama/TinyLlama-1.1B-Chat-v1.0", num_shard=2, grammar_support=True
"TinyLlama/TinyLlama-1.1B-Chat-v1.0", num_shard=2, disable_grammar_support=False
) as handle:
yield handle

View File

@ -382,10 +382,10 @@ struct Args {
#[clap(long, env)]
tokenizer_config_path: Option<String>,
/// Enable outlines grammar constrained generation
/// Disable outlines grammar constrained generation.
/// This is a feature that allows you to generate text that follows a specific grammar.
#[clap(long, env)]
grammar_support: bool,
disable_grammar_support: bool,
/// Display a lot of information about your runtime environment
#[clap(long, short, action)]
@ -1057,8 +1057,8 @@ fn spawn_webserver(
];
// Grammar support
if args.grammar_support {
router_args.push("--grammar-support".to_string());
if args.disable_grammar_support {
router_args.push("--disable-grammar-support".to_string());
}
// Tokenizer config path

View File

@ -76,7 +76,7 @@ struct Args {
#[clap(long, env, default_value_t = false)]
messages_api_enabled: bool,
#[clap(long, env, default_value_t = false)]
grammar_support: bool,
disable_grammar_support: bool,
}
#[tokio::main]
@ -110,7 +110,7 @@ async fn main() -> Result<(), RouterError> {
ngrok_authtoken,
ngrok_edge,
messages_api_enabled,
grammar_support,
disable_grammar_support,
} = args;
// Launch Tokio runtime
@ -362,7 +362,7 @@ async fn main() -> Result<(), RouterError> {
ngrok_edge,
tokenizer_config,
messages_api_enabled,
grammar_support,
disable_grammar_support,
)
.await?;
Ok(())

View File

@ -21,7 +21,7 @@ pub struct Validation {
max_top_n_tokens: u32,
max_input_length: usize,
max_total_tokens: usize,
grammar_support: bool,
disable_grammar_support: bool,
/// Channel to communicate with the background tokenization task
sender: Option<mpsc::UnboundedSender<TokenizerRequest>>,
}
@ -35,7 +35,7 @@ impl Validation {
max_top_n_tokens: u32,
max_input_length: usize,
max_total_tokens: usize,
grammar_support: bool,
disable_grammar_support: bool,
) -> Self {
// If we have a fast tokenizer
let sender = if let Some(tokenizer) = tokenizer {
@ -70,7 +70,7 @@ impl Validation {
max_top_n_tokens,
max_input_length,
max_total_tokens,
grammar_support,
disable_grammar_support,
}
}
@ -308,7 +308,7 @@ impl Validation {
let (grammar, grammar_type) = match grammar {
Some(grammar) => {
// Ensure that grammar is not set if it's not supported
if !self.grammar_support {
if self.disable_grammar_support {
return Err(ValidationError::Grammar);
}
match grammar {
@ -502,7 +502,7 @@ mod tests {
let max_input_length = 5;
let max_total_tokens = 6;
let workers = 1;
let grammar_support = false;
let disable_grammar_support = true;
let validation = Validation::new(
workers,
tokenizer,
@ -511,7 +511,7 @@ mod tests {
max_top_n_tokens,
max_input_length,
max_total_tokens,
grammar_support,
disable_grammar_support,
);
let max_new_tokens = 10;
@ -532,7 +532,7 @@ mod tests {
let max_top_n_tokens = 4;
let max_input_length = 5;
let max_total_tokens = 6;
let grammar_support = false;
let disable_grammar_support = true;
let workers = 1;
let validation = Validation::new(
workers,
@ -542,7 +542,7 @@ mod tests {
max_top_n_tokens,
max_input_length,
max_total_tokens,
grammar_support,
disable_grammar_support,
);
let max_new_tokens = 10;
@ -564,7 +564,7 @@ mod tests {
let max_input_length = 5;
let max_total_tokens = 6;
let workers = 1;
let grammar_support = false;
let disable_grammar_support = true;
let validation = Validation::new(
workers,
tokenizer,
@ -573,7 +573,7 @@ mod tests {
max_top_n_tokens,
max_input_length,
max_total_tokens,
grammar_support,
disable_grammar_support,
);
match validation
.validate(GenerateRequest {
@ -600,7 +600,7 @@ mod tests {
let max_input_length = 5;
let max_total_tokens = 106;
let workers = 1;
let grammar_support = false;
let disable_grammar_support = true;
let validation = Validation::new(
workers,
tokenizer,
@ -609,7 +609,7 @@ mod tests {
max_top_n_tokens,
max_input_length,
max_total_tokens,
grammar_support,
disable_grammar_support,
);
match validation
.validate(GenerateRequest {
@ -665,7 +665,7 @@ mod tests {
let max_input_length = 5;
let max_total_tokens = 106;
let workers = 1;
let grammar_support = false;
let disable_grammar_support = true;
let validation = Validation::new(
workers,
tokenizer,
@ -674,7 +674,7 @@ mod tests {
max_top_n_tokens,
max_input_length,
max_total_tokens,
grammar_support,
disable_grammar_support,
);
match validation
.validate(GenerateRequest {