mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 12:24:53 +00:00
feat: prefer disable grammar
This commit is contained in:
parent
f0cdd9c8ea
commit
63c52fb22d
@ -379,12 +379,12 @@ Options:
|
|||||||
[env: TOKENIZER_CONFIG_PATH=]
|
[env: TOKENIZER_CONFIG_PATH=]
|
||||||
|
|
||||||
```
|
```
|
||||||
## GRAMMAR_SUPPORT
|
## DISABLE_GRAMMAR_SUPPORT
|
||||||
```shell
|
```shell
|
||||||
--grammar-support
|
--disable-grammar-support
|
||||||
Enable outlines grammar constrained generation This is a feature that allows you to generate text that follows a specific grammar
|
Disable outlines grammar constrained generation. This is a feature that allows you to generate text that follows a specific grammar
|
||||||
|
|
||||||
[env: GRAMMAR_SUPPORT=]
|
[env: DISABLE_GRAMMAR_SUPPORT=]
|
||||||
|
|
||||||
```
|
```
|
||||||
## ENV
|
## ENV
|
||||||
|
@ -231,7 +231,7 @@ def launcher(event_loop):
|
|||||||
quantize: Optional[str] = None,
|
quantize: Optional[str] = None,
|
||||||
trust_remote_code: bool = False,
|
trust_remote_code: bool = False,
|
||||||
use_flash_attention: bool = True,
|
use_flash_attention: bool = True,
|
||||||
grammar_support: bool = False,
|
disable_grammar_support: bool = False,
|
||||||
dtype: Optional[str] = None,
|
dtype: Optional[str] = None,
|
||||||
):
|
):
|
||||||
port = random.randint(8000, 10_000)
|
port = random.randint(8000, 10_000)
|
||||||
@ -255,8 +255,8 @@ def launcher(event_loop):
|
|||||||
|
|
||||||
env = os.environ
|
env = os.environ
|
||||||
|
|
||||||
if grammar_support:
|
if disable_grammar_support:
|
||||||
args.append("--grammar-support")
|
args.append("--disable-grammar-support")
|
||||||
if num_shard is not None:
|
if num_shard is not None:
|
||||||
args.extend(["--num-shard", str(num_shard)])
|
args.extend(["--num-shard", str(num_shard)])
|
||||||
if quantize is not None:
|
if quantize is not None:
|
||||||
@ -305,7 +305,7 @@ def launcher(event_loop):
|
|||||||
args = ["--model-id", model_id, "--env"]
|
args = ["--model-id", model_id, "--env"]
|
||||||
|
|
||||||
if grammar_support:
|
if grammar_support:
|
||||||
args.append("--grammar-support")
|
args.append("--disable-grammar-support")
|
||||||
if num_shard is not None:
|
if num_shard is not None:
|
||||||
args.extend(["--num-shard", str(num_shard)])
|
args.extend(["--num-shard", str(num_shard)])
|
||||||
if quantize is not None:
|
if quantize is not None:
|
||||||
|
@ -7,7 +7,7 @@ from text_generation.types import GrammarType
|
|||||||
@pytest.fixture(scope="module")
|
@pytest.fixture(scope="module")
|
||||||
def flash_llama_grammar_handle(launcher):
|
def flash_llama_grammar_handle(launcher):
|
||||||
with launcher(
|
with launcher(
|
||||||
"TinyLlama/TinyLlama-1.1B-Chat-v1.0", num_shard=2, grammar_support=True
|
"TinyLlama/TinyLlama-1.1B-Chat-v1.0", num_shard=2, disable_grammar_support=False
|
||||||
) as handle:
|
) as handle:
|
||||||
yield handle
|
yield handle
|
||||||
|
|
||||||
|
@ -382,10 +382,10 @@ struct Args {
|
|||||||
#[clap(long, env)]
|
#[clap(long, env)]
|
||||||
tokenizer_config_path: Option<String>,
|
tokenizer_config_path: Option<String>,
|
||||||
|
|
||||||
/// Enable outlines grammar constrained generation
|
/// Disable outlines grammar constrained generation.
|
||||||
/// This is a feature that allows you to generate text that follows a specific grammar.
|
/// This is a feature that allows you to generate text that follows a specific grammar.
|
||||||
#[clap(long, env)]
|
#[clap(long, env)]
|
||||||
grammar_support: bool,
|
disable_grammar_support: bool,
|
||||||
|
|
||||||
/// Display a lot of information about your runtime environment
|
/// Display a lot of information about your runtime environment
|
||||||
#[clap(long, short, action)]
|
#[clap(long, short, action)]
|
||||||
@ -1057,8 +1057,8 @@ fn spawn_webserver(
|
|||||||
];
|
];
|
||||||
|
|
||||||
// Grammar support
|
// Grammar support
|
||||||
if args.grammar_support {
|
if args.disable_grammar_support {
|
||||||
router_args.push("--grammar-support".to_string());
|
router_args.push("--disable-grammar-support".to_string());
|
||||||
}
|
}
|
||||||
|
|
||||||
// Tokenizer config path
|
// Tokenizer config path
|
||||||
|
@ -76,7 +76,7 @@ struct Args {
|
|||||||
#[clap(long, env, default_value_t = false)]
|
#[clap(long, env, default_value_t = false)]
|
||||||
messages_api_enabled: bool,
|
messages_api_enabled: bool,
|
||||||
#[clap(long, env, default_value_t = false)]
|
#[clap(long, env, default_value_t = false)]
|
||||||
grammar_support: bool,
|
disable_grammar_support: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[tokio::main]
|
#[tokio::main]
|
||||||
@ -110,7 +110,7 @@ async fn main() -> Result<(), RouterError> {
|
|||||||
ngrok_authtoken,
|
ngrok_authtoken,
|
||||||
ngrok_edge,
|
ngrok_edge,
|
||||||
messages_api_enabled,
|
messages_api_enabled,
|
||||||
grammar_support,
|
disable_grammar_support,
|
||||||
} = args;
|
} = args;
|
||||||
|
|
||||||
// Launch Tokio runtime
|
// Launch Tokio runtime
|
||||||
@ -362,7 +362,7 @@ async fn main() -> Result<(), RouterError> {
|
|||||||
ngrok_edge,
|
ngrok_edge,
|
||||||
tokenizer_config,
|
tokenizer_config,
|
||||||
messages_api_enabled,
|
messages_api_enabled,
|
||||||
grammar_support,
|
disable_grammar_support,
|
||||||
)
|
)
|
||||||
.await?;
|
.await?;
|
||||||
Ok(())
|
Ok(())
|
||||||
|
@ -21,7 +21,7 @@ pub struct Validation {
|
|||||||
max_top_n_tokens: u32,
|
max_top_n_tokens: u32,
|
||||||
max_input_length: usize,
|
max_input_length: usize,
|
||||||
max_total_tokens: usize,
|
max_total_tokens: usize,
|
||||||
grammar_support: bool,
|
disable_grammar_support: bool,
|
||||||
/// Channel to communicate with the background tokenization task
|
/// Channel to communicate with the background tokenization task
|
||||||
sender: Option<mpsc::UnboundedSender<TokenizerRequest>>,
|
sender: Option<mpsc::UnboundedSender<TokenizerRequest>>,
|
||||||
}
|
}
|
||||||
@ -35,7 +35,7 @@ impl Validation {
|
|||||||
max_top_n_tokens: u32,
|
max_top_n_tokens: u32,
|
||||||
max_input_length: usize,
|
max_input_length: usize,
|
||||||
max_total_tokens: usize,
|
max_total_tokens: usize,
|
||||||
grammar_support: bool,
|
disable_grammar_support: bool,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
// If we have a fast tokenizer
|
// If we have a fast tokenizer
|
||||||
let sender = if let Some(tokenizer) = tokenizer {
|
let sender = if let Some(tokenizer) = tokenizer {
|
||||||
@ -70,7 +70,7 @@ impl Validation {
|
|||||||
max_top_n_tokens,
|
max_top_n_tokens,
|
||||||
max_input_length,
|
max_input_length,
|
||||||
max_total_tokens,
|
max_total_tokens,
|
||||||
grammar_support,
|
disable_grammar_support,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -308,7 +308,7 @@ impl Validation {
|
|||||||
let (grammar, grammar_type) = match grammar {
|
let (grammar, grammar_type) = match grammar {
|
||||||
Some(grammar) => {
|
Some(grammar) => {
|
||||||
// Ensure that grammar is not set if it's not supported
|
// Ensure that grammar is not set if it's not supported
|
||||||
if !self.grammar_support {
|
if self.disable_grammar_support {
|
||||||
return Err(ValidationError::Grammar);
|
return Err(ValidationError::Grammar);
|
||||||
}
|
}
|
||||||
match grammar {
|
match grammar {
|
||||||
@ -502,7 +502,7 @@ mod tests {
|
|||||||
let max_input_length = 5;
|
let max_input_length = 5;
|
||||||
let max_total_tokens = 6;
|
let max_total_tokens = 6;
|
||||||
let workers = 1;
|
let workers = 1;
|
||||||
let grammar_support = false;
|
let disable_grammar_support = true;
|
||||||
let validation = Validation::new(
|
let validation = Validation::new(
|
||||||
workers,
|
workers,
|
||||||
tokenizer,
|
tokenizer,
|
||||||
@ -511,7 +511,7 @@ mod tests {
|
|||||||
max_top_n_tokens,
|
max_top_n_tokens,
|
||||||
max_input_length,
|
max_input_length,
|
||||||
max_total_tokens,
|
max_total_tokens,
|
||||||
grammar_support,
|
disable_grammar_support,
|
||||||
);
|
);
|
||||||
|
|
||||||
let max_new_tokens = 10;
|
let max_new_tokens = 10;
|
||||||
@ -532,7 +532,7 @@ mod tests {
|
|||||||
let max_top_n_tokens = 4;
|
let max_top_n_tokens = 4;
|
||||||
let max_input_length = 5;
|
let max_input_length = 5;
|
||||||
let max_total_tokens = 6;
|
let max_total_tokens = 6;
|
||||||
let grammar_support = false;
|
let disable_grammar_support = true;
|
||||||
let workers = 1;
|
let workers = 1;
|
||||||
let validation = Validation::new(
|
let validation = Validation::new(
|
||||||
workers,
|
workers,
|
||||||
@ -542,7 +542,7 @@ mod tests {
|
|||||||
max_top_n_tokens,
|
max_top_n_tokens,
|
||||||
max_input_length,
|
max_input_length,
|
||||||
max_total_tokens,
|
max_total_tokens,
|
||||||
grammar_support,
|
disable_grammar_support,
|
||||||
);
|
);
|
||||||
|
|
||||||
let max_new_tokens = 10;
|
let max_new_tokens = 10;
|
||||||
@ -564,7 +564,7 @@ mod tests {
|
|||||||
let max_input_length = 5;
|
let max_input_length = 5;
|
||||||
let max_total_tokens = 6;
|
let max_total_tokens = 6;
|
||||||
let workers = 1;
|
let workers = 1;
|
||||||
let grammar_support = false;
|
let disable_grammar_support = true;
|
||||||
let validation = Validation::new(
|
let validation = Validation::new(
|
||||||
workers,
|
workers,
|
||||||
tokenizer,
|
tokenizer,
|
||||||
@ -573,7 +573,7 @@ mod tests {
|
|||||||
max_top_n_tokens,
|
max_top_n_tokens,
|
||||||
max_input_length,
|
max_input_length,
|
||||||
max_total_tokens,
|
max_total_tokens,
|
||||||
grammar_support,
|
disable_grammar_support,
|
||||||
);
|
);
|
||||||
match validation
|
match validation
|
||||||
.validate(GenerateRequest {
|
.validate(GenerateRequest {
|
||||||
@ -600,7 +600,7 @@ mod tests {
|
|||||||
let max_input_length = 5;
|
let max_input_length = 5;
|
||||||
let max_total_tokens = 106;
|
let max_total_tokens = 106;
|
||||||
let workers = 1;
|
let workers = 1;
|
||||||
let grammar_support = false;
|
let disable_grammar_support = true;
|
||||||
let validation = Validation::new(
|
let validation = Validation::new(
|
||||||
workers,
|
workers,
|
||||||
tokenizer,
|
tokenizer,
|
||||||
@ -609,7 +609,7 @@ mod tests {
|
|||||||
max_top_n_tokens,
|
max_top_n_tokens,
|
||||||
max_input_length,
|
max_input_length,
|
||||||
max_total_tokens,
|
max_total_tokens,
|
||||||
grammar_support,
|
disable_grammar_support,
|
||||||
);
|
);
|
||||||
match validation
|
match validation
|
||||||
.validate(GenerateRequest {
|
.validate(GenerateRequest {
|
||||||
@ -665,7 +665,7 @@ mod tests {
|
|||||||
let max_input_length = 5;
|
let max_input_length = 5;
|
||||||
let max_total_tokens = 106;
|
let max_total_tokens = 106;
|
||||||
let workers = 1;
|
let workers = 1;
|
||||||
let grammar_support = false;
|
let disable_grammar_support = true;
|
||||||
let validation = Validation::new(
|
let validation = Validation::new(
|
||||||
workers,
|
workers,
|
||||||
tokenizer,
|
tokenizer,
|
||||||
@ -674,7 +674,7 @@ mod tests {
|
|||||||
max_top_n_tokens,
|
max_top_n_tokens,
|
||||||
max_input_length,
|
max_input_length,
|
||||||
max_total_tokens,
|
max_total_tokens,
|
||||||
grammar_support,
|
disable_grammar_support,
|
||||||
);
|
);
|
||||||
match validation
|
match validation
|
||||||
.validate(GenerateRequest {
|
.validate(GenerateRequest {
|
||||||
|
Loading…
Reference in New Issue
Block a user