diff --git a/proto/generate.proto b/proto/generate.proto index c873e661..7dbbda4a 100644 --- a/proto/generate.proto +++ b/proto/generate.proto @@ -50,6 +50,13 @@ message ClearCacheRequest { /// Empty response message ClearCacheResponse {} +message LogitsProcessorParameters { + // The name of the processor to apply + string name = 1; + // The parameters to pass to the processor + repeated string parameters = 2; +} + message NextTokenChooserParameters { /// exponential scaling output probability distribution float temperature = 1; @@ -67,6 +74,8 @@ message NextTokenChooserParameters { float repetition_penalty = 7; /// token watermarking using "A Watermark for Large Language Models" bool watermark = 8; + /// Optional Logits Processors definitions + repeated LogitsProcessorParameters logits_processors = 9; } message StoppingCriteriaParameters { diff --git a/router/client/src/client.rs b/router/client/src/client.rs index 341e70fd..22f3f663 100644 --- a/router/client/src/client.rs +++ b/router/client/src/client.rs @@ -125,6 +125,7 @@ impl Client { seed: 0, repetition_penalty: 1.2, watermark: true, + logits_processors: vec![], }), stopping_parameters: Some(StoppingCriteriaParameters { max_new_tokens: max_total_tokens - truncate, diff --git a/router/src/health.rs b/router/src/health.rs index ab290fc1..3cf6861c 100644 --- a/router/src/health.rs +++ b/router/src/health.rs @@ -44,6 +44,7 @@ impl Health { seed: 0, repetition_penalty: 1.0, watermark: false, + logits_processors: vec![], }), stopping_parameters: Some(StoppingCriteriaParameters { max_new_tokens: 1, diff --git a/router/src/validation.rs b/router/src/validation.rs index 1b47fc97..a021e569 100644 --- a/router/src/validation.rs +++ b/router/src/validation.rs @@ -279,6 +279,7 @@ impl Validation { do_sample, seed, watermark, + logits_processors: vec![], }; let stopping_parameters = StoppingCriteriaParameters { max_new_tokens,