Fixing the oom maybe with 2.5.1 change. (#2958)

This commit is contained in:
Nicolas Patry 2025-01-28 10:30:28 +01:00 committed by GitHub
parent c690da5973
commit eb3df0f46f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -28,7 +28,7 @@ if PREFIX_CACHING and ATTENTION not in {
raise RuntimeError("Prefix caching is only supported with flashinfer")
MEM_POOL = torch.cuda.graph_pool_handle() if torch.cuda.is_available() else None
TGI_WIGGLE_ROOM = float(os.getenv("TGI_WIGGLE_ROOM", "0.95"))
TGI_WIGGLE_ROOM = float(os.getenv("TGI_WIGGLE_ROOM", "0.93"))
assert TGI_WIGGLE_ROOM > 0
assert TGI_WIGGLE_ROOM < 1