mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 20:34:54 +00:00
Black
This commit is contained in:
parent
a75c9a21e8
commit
53f9b18086
@ -24,10 +24,8 @@ class KVCache:
|
||||
):
|
||||
"""Construct the key-value cache for a layer."""
|
||||
|
||||
if (
|
||||
dtype == torch.float8_e5m2
|
||||
and (ATTENTION != "flashinfer"
|
||||
or SYSTEM != "cuda")
|
||||
if dtype == torch.float8_e5m2 and (
|
||||
ATTENTION != "flashinfer" or SYSTEM != "cuda"
|
||||
):
|
||||
raise ValueError(
|
||||
"float8_e5m2 KV cache is currently only supported for flashinfer on CUDA"
|
||||
|
Loading…
Reference in New Issue
Block a user