fix: improve fbgemm_gpu check and lints

2025-09-12 04:44:52 +00:00 · 2024-07-24 15:23:10 +00:00 · 2024-07-24 15:23:10 +00:00 · e216e53ea8
commit e216e53ea8
parent 382bf59f4f
3 changed files with 7 additions and 7 deletions
--- a/server/text_generation_server/layers/fp8.py
+++ b/server/text_generation_server/layers/fp8.py
@ -20,7 +20,10 @@ FBGEMM_DYN_AVAILABLE = False


 def is_fbgemm_gpu_available():
-    return importlib.util.find_spec("fbgemm_gpu.experimental.gen_ai") is not None
+    try:
+        return importlib.util.find_spec("fbgemm_gpu.experimental.gen_ai") is not None
+    except ModuleNotFoundError:
+        return False


 if is_fbgemm_gpu_available():
--- a/server/text_generation_server/layers/marlin/init.py
+++ b/server/text_generation_server/layers/marlin/init.py
@ -1,6 +1,3 @@
-from typing import List, Tuple
-
-import torch
 from text_generation_server.layers.marlin.fp8 import GPTQMarlinFP8Linear
 from text_generation_server.layers.marlin.gptq import (
    GPTQMarlinLinear,
--- a/server/text_generation_server/layers/marlin/marlin.py
+++ b/server/text_generation_server/layers/marlin/marlin.py
@ -1,5 +1,5 @@
 from dataclasses import dataclass
-from typing import List, Optional, Tuple, Union
+from typing import List, Optional, Union

 import torch
 import torch.nn as nn
@ -85,7 +85,7 @@ class MarlinWeightsLoader(WeightsLoader):
                )
            except RuntimeError:
                raise RuntimeError(
-                    f"Cannot load `marlin` weight, make sure the model is already quantized"
+                    "Cannot load `marlin` weight, make sure the model is already quantized"
                )

            B_meta = torch.cat(
@ -104,7 +104,7 @@ class MarlinWeightsLoader(WeightsLoader):
                )
            except RuntimeError:
                raise RuntimeError(
-                    f"Cannot load `marlin` weight, make sure the model is already quantized"
+                    "Cannot load `marlin` weight, make sure the model is already quantized"
                )
            s = torch.cat(
                [weights.get_sharded(f"{p}.s", dim=1) for p in prefixes], dim=1