fix(server): do not warp prefill logits

2025-09-10 03:44:54 +00:00 · 2023-03-09 11:33:28 +01:00 · 2023-03-09 11:33:28 +01:00 · f49786ccba
commit f49786ccba
parent 941cd42e0c
1 changed files with 5 additions and 1 deletions
--- a/server/text_generation_server/utils/tokens.py
+++ b/server/text_generation_server/utils/tokens.py
@ -70,7 +70,11 @@ class NextTokenChooser:

    def __call__(self, input_ids, scores):
        # Warp logits
-        scores = self.warpers(input_ids, scores)
+        if scores.shape[0] > 1:
+            # only warp the last token logits
+            scores[-1:, :] = self.warpers(input_ids, scores[-1:, :])
+        else:
+            scores = self.warpers(input_ids, scores)

        # Compute logprobs
        logprobs = torch.log_softmax(scores, -1)