From 57570bf59815f34b556d5d2858e7b781c519d6e7 Mon Sep 17 00:00:00 2001
From: Nicolas Patry <patry.nicolas@protonmail.com>
Date: Mon, 27 Jan 2025 14:35:00 +0100
Subject: [PATCH] Fixing the oom maybe with 2.5.1 change.

---
 server/text_generation_server/models/globals.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/server/text_generation_server/models/globals.py b/server/text_generation_server/models/globals.py
index 889de028..19696372 100644
--- a/server/text_generation_server/models/globals.py
+++ b/server/text_generation_server/models/globals.py
@@ -28,7 +28,7 @@ if PREFIX_CACHING and ATTENTION not in {
     raise RuntimeError("Prefix caching is only supported with flashinfer")
 
 MEM_POOL = torch.cuda.graph_pool_handle() if torch.cuda.is_available() else None
-TGI_WIGGLE_ROOM = float(os.getenv("TGI_WIGGLE_ROOM", "0.95"))
+TGI_WIGGLE_ROOM = float(os.getenv("TGI_WIGGLE_ROOM", "0.93"))
 assert TGI_WIGGLE_ROOM > 0
 assert TGI_WIGGLE_ROOM < 1