From eb3df0f46fd1d0e42188fd14124f255a2a7df199 Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Tue, 28 Jan 2025 10:30:28 +0100 Subject: [PATCH] Fixing the oom maybe with 2.5.1 change. (#2958) --- server/text_generation_server/models/globals.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/text_generation_server/models/globals.py b/server/text_generation_server/models/globals.py index 889de028..19696372 100644 --- a/server/text_generation_server/models/globals.py +++ b/server/text_generation_server/models/globals.py @@ -28,7 +28,7 @@ if PREFIX_CACHING and ATTENTION not in { raise RuntimeError("Prefix caching is only supported with flashinfer") MEM_POOL = torch.cuda.graph_pool_handle() if torch.cuda.is_available() else None -TGI_WIGGLE_ROOM = float(os.getenv("TGI_WIGGLE_ROOM", "0.95")) +TGI_WIGGLE_ROOM = float(os.getenv("TGI_WIGGLE_ROOM", "0.93")) assert TGI_WIGGLE_ROOM > 0 assert TGI_WIGGLE_ROOM < 1