fix: consolidate long rope paths

2025-09-11 20:34:54 +00:00 · 2024-09-06 14:17:56 +00:00 · 2024-09-06 14:17:56 +00:00 · dad070b1fc
commit dad070b1fc
parent b1026a84cb
1 changed files with 14 additions and 37 deletions
--- a/server/text_generation_server/layers/rotary.py
+++ b/server/text_generation_server/layers/rotary.py
@ -89,43 +89,6 @@ class PositionRotaryEmbedding(nn.Module):

            if rope_type == "linear":
                pass
-            elif rope_type == "longrope":
-                short_factor = torch.tensor(
-                    rope_scaling["short_factor"], dtype=torch.float32, device=device
-                )
-                long_factor = torch.tensor(
-                    rope_scaling["long_factor"], dtype=torch.float32, device=device
-                )
-                short_mscale = rope_scaling["short_mscale"]
-                long_mscale = rope_scaling["long_mscale"]
-                original_max_position_embeddings = (
-                    config.original_max_position_embeddings
-                )
-                return Phi3LongRoPEScaledRotaryEmbedding(
-                    short_inv_freq=1.0
-                    / (
-                        short_factor
-                        * base
-                        ** (
-                            torch.arange(0, dim, 2, device=device, dtype=torch.float32)
-                            / dim
-                        )
-                    ),
-                    long_inv_freq=1.0
-                    / (
-                        long_factor
-                        * base
-                        ** (
-                            torch.arange(0, dim, 2, device=device, dtype=torch.float32)
-                            / dim
-                        )
-                    ),
-                    max_position_embeddings=config.max_position_embeddings,
-                    short_mscale=short_mscale,
-                    long_mscale=long_mscale,
-                    original_max_position_embeddings=original_max_position_embeddings,
-                )
-
            elif rope_type == "dynamic":
                scaling_factor = rope_scaling["factor"]
                return DynamicPositionRotaryEmbedding(
@ -203,6 +166,20 @@ class PositionRotaryEmbedding(nn.Module):
                        1 + math.log(scale) / math.log(original_max_position_embeddings)
                    )

+                # if short_mscale and long_mscale are provided we need to scale the freqs
+                # using the Phi3LongRoPEScaledRotaryEmbedding
+                if ("short_mscale" in rope_scaling) and ("long_mscale" in rope_scaling):
+                    short_mscale = rope_scaling["short_mscale"]
+                    long_mscale = rope_scaling["long_mscale"]
+                    return Phi3LongRoPEScaledRotaryEmbedding(
+                        short_inv_freq=short_inv_freq,
+                        long_inv_freq=long_inv_freq,
+                        max_position_embeddings=config.max_position_embeddings,
+                        short_mscale=short_mscale,
+                        long_mscale=long_mscale,
+                        original_max_position_embeddings=original_max_position_embeddings,
+                    )
+
                return SuRotaryEmbedding(
                    short_inv_freq=short_inv_freq,
                    long_inv_freq=long_inv_freq,