fix import

2025-09-10 11:54:52 +00:00 · 2024-09-27 12:32:17 +00:00 · 2024-09-27 12:32:17 +00:00 · 816d4b67b2
commit 816d4b67b2
parent 47c81d2924
1 changed files with 4 additions and 0 deletions
--- a/server/text_generation_server/layers/attention/init.py
+++ b/server/text_generation_server/layers/attention/init.py
@ -11,6 +11,7 @@ if SYSTEM == "cuda":
        paged_attention,
        reshape_and_cache,
        SUPPORTS_WINDOWING,
+        PREFILL_IN_KV_CACHE,
    )
 elif SYSTEM == "rocm":
    from .rocm import (
@ -18,6 +19,7 @@ elif SYSTEM == "rocm":
        paged_attention,
        reshape_and_cache,
        SUPPORTS_WINDOWING,
+        PREFILL_IN_KV_CACHE,
    )
 elif SYSTEM == "ipex":
    from .ipex import (
@ -25,6 +27,7 @@ elif SYSTEM == "ipex":
        paged_attention,
        reshape_and_cache,
        SUPPORTS_WINDOWING,
+        PREFILL_IN_KV_CACHE,
    )
 else:
    raise ImportError(f"System {SYSTEM} doesn't support flash/paged attention")
@ -35,5 +38,6 @@ __all__ = [
    "paged_attention",
    "reshape_and_cache",
    "SUPPORTS_WINDOWING",
+    "PREFILL_IN_KV_CACHE",
    "Seqlen",
 ]