From e03a7167ee49b51248d9991f7dcb10fab421da2b Mon Sep 17 00:00:00 2001 From: Florian Zimmermeister Date: Sat, 5 Oct 2024 09:12:12 +0200 Subject: [PATCH] Update kv_cache.py --- server/text_generation_server/layers/attention/kv_cache.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/text_generation_server/layers/attention/kv_cache.py b/server/text_generation_server/layers/attention/kv_cache.py index 80033122..ced4b5b4 100644 --- a/server/text_generation_server/layers/attention/kv_cache.py +++ b/server/text_generation_server/layers/attention/kv_cache.py @@ -26,8 +26,8 @@ class KVCache: if ( dtype == torch.float8_e5m2 - and ATTENTION != "flashinfer" - and SYSTEM != "cuda" + and (ATTENTION != "flashinfer" + or SYSTEM != "cuda") ): raise ValueError( "float8_e5m2 KV cache is currently only supported for flashinfer on CUDA"