Fixing the oom maybe with 2.5.1 change.

This commit is contained in:
Nicolas Patry 2025-01-27 14:35:00 +01:00
parent 40b00275b2
commit 57570bf598
No known key found for this signature in database
GPG Key ID: D2920555C90F704C

View File

@ -28,7 +28,7 @@ if PREFIX_CACHING and ATTENTION not in {
raise RuntimeError("Prefix caching is only supported with flashinfer") raise RuntimeError("Prefix caching is only supported with flashinfer")
MEM_POOL = torch.cuda.graph_pool_handle() if torch.cuda.is_available() else None MEM_POOL = torch.cuda.graph_pool_handle() if torch.cuda.is_available() else None
TGI_WIGGLE_ROOM = float(os.getenv("TGI_WIGGLE_ROOM", "0.95")) TGI_WIGGLE_ROOM = float(os.getenv("TGI_WIGGLE_ROOM", "0.93"))
assert TGI_WIGGLE_ROOM > 0 assert TGI_WIGGLE_ROOM > 0
assert TGI_WIGGLE_ROOM < 1 assert TGI_WIGGLE_ROOM < 1