mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 20:34:54 +00:00
Make check more obvious
This commit is contained in:
parent
aa92e451a0
commit
751f1bb815
@ -24,10 +24,8 @@ class KVCache:
|
|||||||
):
|
):
|
||||||
"""Construct the key-value cache for a layer."""
|
"""Construct the key-value cache for a layer."""
|
||||||
|
|
||||||
if (
|
if dtype in {torch.float8_e5m2, torch.float8_e4m3fn} and (
|
||||||
dtype.itemsize == 1
|
ATTENTION != "flashinfer" or SYSTEM != "cuda"
|
||||||
and dtype.is_floating_point
|
|
||||||
and (ATTENTION != "flashinfer" or SYSTEM != "cuda")
|
|
||||||
):
|
):
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"FP8 KV cache is currently only supported for flashinfer on CUDA"
|
"FP8 KV cache is currently only supported for flashinfer on CUDA"
|
||||||
|
Loading…
Reference in New Issue
Block a user