fix: default num_ln_in_parallel_attn to one if not supplied (#2364)

2025-10-20 04:15:23 +00:00 · 2024-08-06 13:33:22 -04:00 · 2024-08-06 13:33:22 -04:00 · db873be177
commit db873be177
parent 5400c7155d
1 changed files with 3 additions and 1 deletions
--- a/server/text_generation_server/models/custom_modeling/flash_rw_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_rw_modeling.py
@ -473,7 +473,9 @@ class FlashRWLayer(nn.Module):
 class FlashRWLayerNorm(nn.Module):
    def __init__(self, config, prefix: str, weights):
        super().__init__()
-        self.num_ln = config.num_ln_in_parallel_attn
+        # Falcon2 includes the number of layer norms in the config
+        # in the case no number of layer norms is provided, we default to 1
+        self.num_ln = getattr(config, "num_ln_in_parallel_attn", 1)

        if self.num_ln == 1:
            self.input_ln = FastLayerNorm.load(