Fix logic.

2025-09-10 11:54:52 +00:00 · 2023-05-25 09:42:59 +00:00 · 2023-05-25 09:42:59 +00:00 · 7fa79f02ca
commit 7fa79f02ca
parent 4e071bf2f1
1 changed files with 0 additions and 7 deletions
--- a/server/text_generation_server/models/custom_modeling/flash_neox_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_neox_modeling.py
@ -95,13 +95,6 @@ class FlashNeoxAttention(torch.nn.Module):
        rotary_ndims = int(self.head_size * rotary_pct)
        self.rotary_emb = PositionRotaryEmbedding.load(prefix=f"{prefix}.rotary_emb", weights=weights)
        dtype = weights.dtype
        weights.dtype = torch.float32
        self.rotary_emb.inv_freq = nn.Parameter(
            weights.get_tensor(f"{prefix}.rotary_emb.inv_freq")
        )
        weights.dtype = dtype
        self.softmax_scale = self.head_size ** (-0.5)
        self.query_key_value = load_qkv(