From 751f1bb8154fd4fe3a36a4b128c5f830dd0effa2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20de=20Kok?= Date: Wed, 16 Oct 2024 13:54:57 +0000 Subject: [PATCH] Make check more obvious --- server/text_generation_server/layers/attention/kv_cache.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/server/text_generation_server/layers/attention/kv_cache.py b/server/text_generation_server/layers/attention/kv_cache.py index f1f9ecce..7f1dd370 100644 --- a/server/text_generation_server/layers/attention/kv_cache.py +++ b/server/text_generation_server/layers/attention/kv_cache.py @@ -24,10 +24,8 @@ class KVCache: ): """Construct the key-value cache for a layer.""" - if ( - dtype.itemsize == 1 - and dtype.is_floating_point - and (ATTENTION != "flashinfer" or SYSTEM != "cuda") + if dtype in {torch.float8_e5m2, torch.float8_e4m3fn} and ( + ATTENTION != "flashinfer" or SYSTEM != "cuda" ): raise ValueError( "FP8 KV cache is currently only supported for flashinfer on CUDA"