diff --git a/docs/source/basic_tutorials/launcher.md b/docs/source/basic_tutorials/launcher.md index 96ac82c1..36fa1241 100644 --- a/docs/source/basic_tutorials/launcher.md +++ b/docs/source/basic_tutorials/launcher.md @@ -379,12 +379,12 @@ Options: [env: TOKENIZER_CONFIG_PATH=] ``` -## GRAMMAR_SUPPORT +## DISABLE_GRAMMAR_SUPPORT ```shell - --grammar-support - Enable outlines grammar constrained generation This is a feature that allows you to generate text that follows a specific grammar + --disable-grammar-support + Disable outlines grammar constrained generation. This is a feature that allows you to generate text that follows a specific grammar - [env: GRAMMAR_SUPPORT=] + [env: DISABLE_GRAMMAR_SUPPORT=] ``` ## ENV diff --git a/integration-tests/conftest.py b/integration-tests/conftest.py index c579fff5..5bc82e4d 100644 --- a/integration-tests/conftest.py +++ b/integration-tests/conftest.py @@ -231,7 +231,7 @@ def launcher(event_loop): quantize: Optional[str] = None, trust_remote_code: bool = False, use_flash_attention: bool = True, - grammar_support: bool = False, + disable_grammar_support: bool = False, dtype: Optional[str] = None, ): port = random.randint(8000, 10_000) @@ -255,8 +255,8 @@ def launcher(event_loop): env = os.environ - if grammar_support: - args.append("--grammar-support") + if disable_grammar_support: + args.append("--disable-grammar-support") if num_shard is not None: args.extend(["--num-shard", str(num_shard)]) if quantize is not None: @@ -305,7 +305,7 @@ def launcher(event_loop): args = ["--model-id", model_id, "--env"] if grammar_support: - args.append("--grammar-support") + args.append("--disable-grammar-support") if num_shard is not None: args.extend(["--num-shard", str(num_shard)]) if quantize is not None: diff --git a/integration-tests/models/test_grammar_llama.py b/integration-tests/models/test_grammar_llama.py index 3abe1077..f068496c 100644 --- a/integration-tests/models/test_grammar_llama.py +++ b/integration-tests/models/test_grammar_llama.py @@ -7,7 +7,7 @@ from text_generation.types import GrammarType @pytest.fixture(scope="module") def flash_llama_grammar_handle(launcher): with launcher( - "TinyLlama/TinyLlama-1.1B-Chat-v1.0", num_shard=2, grammar_support=True + "TinyLlama/TinyLlama-1.1B-Chat-v1.0", num_shard=2, disable_grammar_support=False ) as handle: yield handle diff --git a/launcher/src/main.rs b/launcher/src/main.rs index de2b3f64..d52e2669 100644 --- a/launcher/src/main.rs +++ b/launcher/src/main.rs @@ -382,10 +382,10 @@ struct Args { #[clap(long, env)] tokenizer_config_path: Option, - /// Enable outlines grammar constrained generation + /// Disable outlines grammar constrained generation. /// This is a feature that allows you to generate text that follows a specific grammar. #[clap(long, env)] - grammar_support: bool, + disable_grammar_support: bool, /// Display a lot of information about your runtime environment #[clap(long, short, action)] @@ -1057,8 +1057,8 @@ fn spawn_webserver( ]; // Grammar support - if args.grammar_support { - router_args.push("--grammar-support".to_string()); + if args.disable_grammar_support { + router_args.push("--disable-grammar-support".to_string()); } // Tokenizer config path diff --git a/router/src/main.rs b/router/src/main.rs index 6bd86d58..457bca8e 100644 --- a/router/src/main.rs +++ b/router/src/main.rs @@ -76,7 +76,7 @@ struct Args { #[clap(long, env, default_value_t = false)] messages_api_enabled: bool, #[clap(long, env, default_value_t = false)] - grammar_support: bool, + disable_grammar_support: bool, } #[tokio::main] @@ -110,7 +110,7 @@ async fn main() -> Result<(), RouterError> { ngrok_authtoken, ngrok_edge, messages_api_enabled, - grammar_support, + disable_grammar_support, } = args; // Launch Tokio runtime @@ -362,7 +362,7 @@ async fn main() -> Result<(), RouterError> { ngrok_edge, tokenizer_config, messages_api_enabled, - grammar_support, + disable_grammar_support, ) .await?; Ok(()) diff --git a/router/src/validation.rs b/router/src/validation.rs index dc79c61a..7801f4e3 100644 --- a/router/src/validation.rs +++ b/router/src/validation.rs @@ -21,7 +21,7 @@ pub struct Validation { max_top_n_tokens: u32, max_input_length: usize, max_total_tokens: usize, - grammar_support: bool, + disable_grammar_support: bool, /// Channel to communicate with the background tokenization task sender: Option>, } @@ -35,7 +35,7 @@ impl Validation { max_top_n_tokens: u32, max_input_length: usize, max_total_tokens: usize, - grammar_support: bool, + disable_grammar_support: bool, ) -> Self { // If we have a fast tokenizer let sender = if let Some(tokenizer) = tokenizer { @@ -70,7 +70,7 @@ impl Validation { max_top_n_tokens, max_input_length, max_total_tokens, - grammar_support, + disable_grammar_support, } } @@ -308,7 +308,7 @@ impl Validation { let (grammar, grammar_type) = match grammar { Some(grammar) => { // Ensure that grammar is not set if it's not supported - if !self.grammar_support { + if self.disable_grammar_support { return Err(ValidationError::Grammar); } match grammar { @@ -502,7 +502,7 @@ mod tests { let max_input_length = 5; let max_total_tokens = 6; let workers = 1; - let grammar_support = false; + let disable_grammar_support = true; let validation = Validation::new( workers, tokenizer, @@ -511,7 +511,7 @@ mod tests { max_top_n_tokens, max_input_length, max_total_tokens, - grammar_support, + disable_grammar_support, ); let max_new_tokens = 10; @@ -532,7 +532,7 @@ mod tests { let max_top_n_tokens = 4; let max_input_length = 5; let max_total_tokens = 6; - let grammar_support = false; + let disable_grammar_support = true; let workers = 1; let validation = Validation::new( workers, @@ -542,7 +542,7 @@ mod tests { max_top_n_tokens, max_input_length, max_total_tokens, - grammar_support, + disable_grammar_support, ); let max_new_tokens = 10; @@ -564,7 +564,7 @@ mod tests { let max_input_length = 5; let max_total_tokens = 6; let workers = 1; - let grammar_support = false; + let disable_grammar_support = true; let validation = Validation::new( workers, tokenizer, @@ -573,7 +573,7 @@ mod tests { max_top_n_tokens, max_input_length, max_total_tokens, - grammar_support, + disable_grammar_support, ); match validation .validate(GenerateRequest { @@ -600,7 +600,7 @@ mod tests { let max_input_length = 5; let max_total_tokens = 106; let workers = 1; - let grammar_support = false; + let disable_grammar_support = true; let validation = Validation::new( workers, tokenizer, @@ -609,7 +609,7 @@ mod tests { max_top_n_tokens, max_input_length, max_total_tokens, - grammar_support, + disable_grammar_support, ); match validation .validate(GenerateRequest { @@ -665,7 +665,7 @@ mod tests { let max_input_length = 5; let max_total_tokens = 106; let workers = 1; - let grammar_support = false; + let disable_grammar_support = true; let validation = Validation::new( workers, tokenizer, @@ -674,7 +674,7 @@ mod tests { max_top_n_tokens, max_input_length, max_total_tokens, - grammar_support, + disable_grammar_support, ); match validation .validate(GenerateRequest {