missing get_weights implementation

2025-09-12 04:44:52 +00:00 · 2024-07-20 09:56:46 +02:00 · 2024-07-20 09:56:46 +02:00 · c9e8b68426
commit c9e8b68426
parent b9410c3edf
1 changed files with 17 additions and 0 deletions
--- a/server/text_generation_server/layers/fp8.py
+++ b/server/text_generation_server/layers/fp8.py
@ -71,6 +71,23 @@ class HybridFP8UnquantLoader(WeightsLoader):
        self.activation_scale_ub = activation_scale_ub
        self.to_fp8 = to_fp8

+    def get_weights(self, weights: "Weights", prefix: str):
+        w = weights.get_tensor(f"{prefix}.weight")
+
+        if w.dtype == torch.float8_e4m3fn:
+            # FP8 branch
+            scale = weights.get_tensor(f"{prefix}.weight_scale", to_dtype=False)
+            return Fp8Weight(
+                weight=w,
+                weight_scale=scale,
+                activation_scale_ub=self.activation_scale_ub,
+                dtype=weights.dtype,
+            )
+        if self.to_fp8:
+            return Fp8Weight(weight=w, dtype=weights.dtype)
+
+        return UnquantizedWeight(w)
+
    def get_weights_col_packed(
        self,
        weights: Weights,