Fixing the scale_weight when users decide to not use the speculation as

much as defined in the config.
2025-09-12 04:44:52 +00:00 · 2024-08-29 12:33:45 +02:00 · 2024-08-29 12:33:45 +02:00 · 09a1de5cd1
commit 09a1de5cd1
parent 62a8343153
1 changed files with 1 additions and 1 deletions
--- a/server/text_generation_server/layers/mlp.py
+++ b/server/text_generation_server/layers/mlp.py
@ -167,7 +167,7 @@ class MLPSpeculatorModel(torch.nn.Module):
        )
        # Weights ensure that state_0 accounts for 50% of state magnitude by final head in expectation
-        self.state_weight = 0.5 ** (0.5 / self.n_predict)
+        self.state_weight = 0.5 ** (0.5 / self.n_predict) if self.n_predict > 0 else 1
        self.emb_weight = math.sqrt(1 - self.state_weight**2)
        self.activation = nn.GELU()
        # TODO