WIP: Adding logits processors to protocol

2025-09-11 04:14:52 +00:00 · 2023-11-21 16:56:24 +02:00 · 2023-11-21 16:56:24 +02:00 · 4ca2c5c945
commit 4ca2c5c945
parent b226e469c9
4 changed files with 12 additions and 0 deletions
--- a/proto/generate.proto
+++ b/proto/generate.proto
@ -50,6 +50,13 @@ message ClearCacheRequest {
 /// Empty response
 message ClearCacheResponse {}

+message LogitsProcessorParameters {
+    // The name of the processor to apply
+    string name = 1;
+    // The parameters to pass to the processor
+    repeated string parameters = 2;
+}
+
 message NextTokenChooserParameters {
    /// exponential scaling output probability distribution
    float temperature = 1;
@ -67,6 +74,8 @@ message NextTokenChooserParameters {
    float repetition_penalty = 7;
    /// token watermarking using "A Watermark for Large Language Models"
    bool watermark = 8;
+    /// Optional Logits Processors definitions
+    repeated LogitsProcessorParameters logits_processors = 9;
 }

 message StoppingCriteriaParameters {
--- a/router/client/src/client.rs
+++ b/router/client/src/client.rs
@ -125,6 +125,7 @@ impl Client {
                    seed: 0,
                    repetition_penalty: 1.2,
                    watermark: true,
+                    logits_processors: vec![],
                }),
                stopping_parameters: Some(StoppingCriteriaParameters {
                    max_new_tokens: max_total_tokens - truncate,
--- a/router/src/health.rs
+++ b/router/src/health.rs
@ -44,6 +44,7 @@ impl Health {
                    seed: 0,
                    repetition_penalty: 1.0,
                    watermark: false,
+                    logits_processors: vec![],
                }),
                stopping_parameters: Some(StoppingCriteriaParameters {
                    max_new_tokens: 1,
--- a/router/src/validation.rs
+++ b/router/src/validation.rs
@ -279,6 +279,7 @@ impl Validation {
            do_sample,
            seed,
            watermark,
+            logits_processors: vec![],
        };
        let stopping_parameters = StoppingCriteriaParameters {
            max_new_tokens,