black

2025-09-10 11:54:52 +00:00 · 2023-05-30 17:09:51 +02:00 · 2023-05-30 17:09:51 +02:00 · c7b899a438
commit c7b899a438
parent a2f437a291
1 changed files with 10 additions and 2 deletions
--- a/server/text_generation_server/models/custom_modeling/flash_rw_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_rw_modeling.py
@ -295,7 +295,11 @@ class FlashRWLargeAttention(torch.nn.Module):
            # Copy to layer past
            layer_past[...] = kv
            # Expand to query shape
-            kv = kv.unsqueeze(2).expand(-1, self.num_groups, self.num_heads, 2, self.head_size).reshape(-1, self.num_groups * self.num_heads, 2, self.head_size)
+            kv = (
                kv.unsqueeze(2)
                .expand(-1, self.num_groups, self.num_heads, 2, self.head_size)
                .reshape(-1, self.num_groups * self.num_heads, 2, self.head_size)
            )
            # output
            attn_output = torch.empty_like(query)
@ -322,7 +326,11 @@ class FlashRWLargeAttention(torch.nn.Module):
            # Add present to the layer_past tensor at the correct indices
            layer_past[layer_past_present_indices] = kv
            # Expand to query shape
-            kv = layer_past.unsqueeze(2).expand(-1, self.num_groups, self.num_heads, 2, self.head_size).reshape(-1, self.num_groups * self.num_heads, 2, self.head_size)
+            kv = (
                layer_past.unsqueeze(2)
                .expand(-1, self.num_groups, self.num_heads, 2, self.head_size)
                .reshape(-1, self.num_groups * self.num_heads, 2, self.head_size)
            )
            # output
            attn_output = torch.empty_like(query)