optimize argmax

2025-09-10 03:44:54 +00:00 · 2023-05-24 16:28:16 +02:00 · 2023-05-24 16:28:16 +02:00 · a62f14872e
commit a62f14872e
parent c59fb353a0
1 changed files with 2 additions and 1 deletions
--- a/server/text_generation_server/utils/tokens.py
+++ b/server/text_generation_server/utils/tokens.py
@ -296,7 +296,8 @@ class HeterogeneousSampling:
    def __call__(self, logits):
        out = torch.empty(logits.shape[0], dtype=torch.int64, device=logits.device)
        if self.greedy_indices:
-            out[self.greedy_indices] = torch.argmax(logits[self.greedy_indices], -1)
+            # Computing for all indices is faster than slicing
            torch.argmax(logits, -1, out=out)
        for i, sampling in self.sampling_mapping.items():
            out[i] = sampling(logits[i])