Remove unneeded Model.num_heads field

2025-09-09 11:24:53 +00:00 · 2022-12-04 11:31:08 -08:00 · 2022-12-04 11:31:08 -08:00 · 1747365e25
commit 1747365e25
parent a172430d8b
5 changed files with 2 additions and 7 deletions
--- a/server/text_generation/models/bloom.py
+++ b/server/text_generation/models/bloom.py
@ -82,7 +82,6 @@ class BLOOMSharded(CausalLM):
        torch.distributed.barrier(group=self.process_group)
        super(CausalLM, self).__init__(
            tokenizer=tokenizer,
            num_heads=config.n_head // self.process_group.size(),
            device=device,
        )
--- a/server/text_generation/models/causal_lm.py
+++ b/server/text_generation/models/causal_lm.py
@ -251,7 +251,6 @@ class CausalLM(Model):
        super(CausalLM, self).__init__(
            tokenizer=tokenizer,
            num_heads=self.model.config.num_attention_heads,
            device=device,
        )
@ -358,7 +357,7 @@ class CausalLM(Model):
            # Force past to be of dim [batch_size, num_heads, ...] for easy indexing
            next_batch_past_key_values = [
                [
-                    t.view(-1, self.num_heads, *t.shape[-2:])[next_batch_keep_indices]
+                    t.view(batch.size, -1, *t.shape[-2:])[next_batch_keep_indices]
                    for t in layer
                ]
                for layer in past
--- a/server/text_generation/models/galactica.py
+++ b/server/text_generation/models/galactica.py
@ -185,7 +185,6 @@ class GalacticaSharded(Galactica):
        torch.distributed.barrier(group=self.process_group)
        super(CausalLM, self).__init__(
            tokenizer=tokenizer,
            num_heads=config.num_attention_heads // self.process_group.size(),
            device=device,
        )
--- a/server/text_generation/models/model.py
+++ b/server/text_generation/models/model.py
@ -10,9 +10,8 @@ B = TypeVar("B", bound=Batch)
 class Model(ABC):
-    def __init__(self, tokenizer: Tokenizer, num_heads: int, device: torch.device):
+    def __init__(self, tokenizer: Tokenizer, device: torch.device):
        self.tokenizer = tokenizer
        self.num_heads = num_heads
        self.device = device
    @property
--- a/server/text_generation/models/seq2seq_lm.py
+++ b/server/text_generation/models/seq2seq_lm.py
@ -319,7 +319,6 @@ class Seq2SeqLM(Model):
        super(Seq2SeqLM, self).__init__(
            tokenizer=tokenizer,
            num_heads=self.model.config.num_attention_heads,
            device=device,
        )