diff --git a/benchmark/src/lib.rs b/benchmark/src/lib.rs index e290665c..a0ef0fe6 100644 --- a/benchmark/src/lib.rs +++ b/benchmark/src/lib.rs @@ -45,7 +45,7 @@ pub async fn run( repetition_penalty: repetition_penalty.unwrap_or(1.0), frequency_penalty: frequency_penalty.unwrap_or(0.0), watermark, - fsm_grammar_state: Vec::new(), + fsm_grammar_state: 0, }; // Initialize terminal properties diff --git a/proto/generate.proto b/proto/generate.proto index 015dd6e9..82081921 100644 --- a/proto/generate.proto +++ b/proto/generate.proto @@ -71,9 +71,9 @@ message NextTokenChooserParameters { /// token watermarking using "A Watermark for Large Language Models" bool watermark = 8; /// grammar (applied if not empty) - repeated string grammar = 10; + string grammar = 10; /// fsm_grammar_state - repeated uint32 fsm_grammar_state = 11; + uint32 fsm_grammar_state = 11; } message StoppingCriteriaParameters { diff --git a/router/client/src/client.rs b/router/client/src/client.rs index f26b6a69..38e6e0e3 100644 --- a/router/client/src/client.rs +++ b/router/client/src/client.rs @@ -128,8 +128,8 @@ impl Client { repetition_penalty: 1.2, frequency_penalty: 0.1, watermark: true, - grammar: Vec::new(), - fsm_grammar_state: Vec::new(), + grammar: String::new(), + fsm_grammar_state: 0, }), stopping_parameters: Some(StoppingCriteriaParameters { max_new_tokens: max_total_tokens - truncate, diff --git a/router/src/health.rs b/router/src/health.rs index 0605dca8..f3cac17e 100644 --- a/router/src/health.rs +++ b/router/src/health.rs @@ -45,8 +45,8 @@ impl Health { repetition_penalty: 1.0, frequency_penalty: 0.0, watermark: false, - grammar: Vec::new(), - fsm_grammar_state: Vec::new(), + grammar: String::new(), + fsm_grammar_state: 0, }), stopping_parameters: Some(StoppingCriteriaParameters { max_new_tokens: 1, diff --git a/router/src/queue.rs b/router/src/queue.rs index 21467aa7..0162b906 100644 --- a/router/src/queue.rs +++ b/router/src/queue.rs @@ -368,8 +368,8 @@ mod tests { repetition_penalty: 0.0, frequency_penalty: 0.0, watermark: false, - grammar: Vec::new(), - fsm_grammar_state: Vec::new(), + grammar: String::new(), + fsm_grammar_state: 0, }, stopping_parameters: StoppingCriteriaParameters { ignore_eos_token: false, diff --git a/router/src/validation.rs b/router/src/validation.rs index 83a68435..2959459d 100644 --- a/router/src/validation.rs +++ b/router/src/validation.rs @@ -294,9 +294,9 @@ impl Validation { .await?; // initialize the grammar parameter - let grammar = vec![grammar]; + let grammar = grammar; // init the start state of the grammar - let fsm_grammar_state = vec![0]; + let fsm_grammar_state = 0; let parameters = NextTokenChooserParameters { temperature,