Fixing the oom maybe with 2.5.1 change. (#2958)

2025-06-11 20:02:07 +00:00 · 2025-01-28 10:30:28 +01:00 · 2025-01-28 10:30:28 +01:00 · eb3df0f46f
commit eb3df0f46f
parent c690da5973
1 changed files with 1 additions and 1 deletions
--- a/server/text_generation_server/models/globals.py
+++ b/server/text_generation_server/models/globals.py
@ -28,7 +28,7 @@ if PREFIX_CACHING and ATTENTION not in {
    raise RuntimeError("Prefix caching is only supported with flashinfer")

 MEM_POOL = torch.cuda.graph_pool_handle() if torch.cuda.is_available() else None
-TGI_WIGGLE_ROOM = float(os.getenv("TGI_WIGGLE_ROOM", "0.95"))
+TGI_WIGGLE_ROOM = float(os.getenv("TGI_WIGGLE_ROOM", "0.93"))
 assert TGI_WIGGLE_ROOM > 0
 assert TGI_WIGGLE_ROOM < 1