Make check more obvious

2025-09-11 20:34:54 +00:00 · 2024-10-16 13:54:57 +00:00 · 2024-10-16 13:54:57 +00:00 · 751f1bb815
commit 751f1bb815
parent aa92e451a0
1 changed files with 2 additions and 4 deletions
--- a/server/text_generation_server/layers/attention/kv_cache.py
+++ b/server/text_generation_server/layers/attention/kv_cache.py
@ -24,10 +24,8 @@ class KVCache:
    ):
        """Construct the key-value cache for a layer."""
-        if (
+        if dtype in {torch.float8_e5m2, torch.float8_e4m3fn} and (
-            dtype.itemsize == 1
+            ATTENTION != "flashinfer" or SYSTEM != "cuda"
            and dtype.is_floating_point
            and (ATTENTION != "flashinfer" or SYSTEM != "cuda")
        ):
            raise ValueError(
                "FP8 KV cache is currently only supported for flashinfer on CUDA"