mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-21 06:42:10 +00:00
fix(server): empty_cache when stopped
This commit is contained in:
parent
c58a0c185b
commit
a2cf1bdb2f
@ -991,6 +991,7 @@ class FlashCausalLM(Model):
|
|||||||
|
|
||||||
if stopped:
|
if stopped:
|
||||||
del batch
|
del batch
|
||||||
|
torch.cuda.empty_cache()
|
||||||
# No need to return a batch if we know that all requests stopped
|
# No need to return a batch if we know that all requests stopped
|
||||||
return generations, None
|
return generations, None
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user