From 09a1de5cd112a7ff940a2de7ebce7077af5b937c Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Thu, 29 Aug 2024 12:33:45 +0200 Subject: [PATCH] Fixing the scale_weight when users decide to not use the speculation as much as defined in the config. --- server/text_generation_server/layers/mlp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/text_generation_server/layers/mlp.py b/server/text_generation_server/layers/mlp.py index 35f0bf56..3884808b 100644 --- a/server/text_generation_server/layers/mlp.py +++ b/server/text_generation_server/layers/mlp.py @@ -167,7 +167,7 @@ class MLPSpeculatorModel(torch.nn.Module): ) # Weights ensure that state_0 accounts for 50% of state magnitude by final head in expectation - self.state_weight = 0.5 ** (0.5 / self.n_predict) + self.state_weight = 0.5 ** (0.5 / self.n_predict) if self.n_predict > 0 else 1 self.emb_weight = math.sqrt(1 - self.state_weight**2) self.activation = nn.GELU() # TODO