fix(server): empty_cache when stopped

This commit is contained in:
OlivierDehaene 2023-07-15 13:57:31 +02:00
parent c58a0c185b
commit a2cf1bdb2f

View File

@ -991,6 +991,7 @@ class FlashCausalLM(Model):
if stopped: if stopped:
del batch del batch
torch.cuda.empty_cache()
# No need to return a batch if we know that all requests stopped # No need to return a batch if we know that all requests stopped
return generations, None return generations, None