Update kv_cache.py

2025-09-11 20:34:54 +00:00 · 2024-10-05 09:12:12 +02:00 · 2024-10-05 09:12:12 +02:00 · e03a7167ee
commit e03a7167ee
parent 2358c2bb54
1 changed files with 2 additions and 2 deletions
--- a/server/text_generation_server/layers/attention/kv_cache.py
+++ b/server/text_generation_server/layers/attention/kv_cache.py
@ -26,8 +26,8 @@ class KVCache:

        if (
            dtype == torch.float8_e5m2
-            and ATTENTION != "flashinfer"
-            and SYSTEM != "cuda"
+            and (ATTENTION != "flashinfer"
+            or SYSTEM != "cuda")
        ):
            raise ValueError(
                "float8_e5m2 KV cache is currently only supported for flashinfer on CUDA"