chunking by default.

2025-09-11 20:34:54 +00:00 · 2024-12-02 07:00:03 +01:00 · 2024-12-02 07:00:03 +01:00 · db1114955a
commit db1114955a
parent 9fab7c6665
1 changed files with 1 additions and 1 deletions
--- a/server/text_generation_server/models/globals.py
+++ b/server/text_generation_server/models/globals.py
@ -12,7 +12,7 @@ PREFIX_CACHING = os.environ["PREFIX_CACHING"].lower() in {
    "1",
    "true",
 }
-PREFILL_CHUNKING = os.getenv("PREFILL_CHUNKING", "0").lower() in {"1", "true"}
+PREFILL_CHUNKING = os.getenv("PREFILL_CHUNKING", "1").lower() in {"1", "true"}
 log_master(logger.info, f"Using prefix caching = {PREFIX_CACHING}")
 _expected = {"paged", "flashdecoding", "flashinfer"}
 assert (