mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-10 11:54:52 +00:00
add clear cache when batch is finished
This commit is contained in:
parent
0111869ad0
commit
8793ae5890
@ -349,6 +349,7 @@ async fn batching_task(
|
||||
}
|
||||
metrics::gauge!("tgi_batch_current_size", 0.0);
|
||||
metrics::gauge!("tgi_batch_current_max_tokens", 0.0);
|
||||
let _ = client.clear_cache(None).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -29,6 +29,8 @@ class Cache:
|
||||
keys = list(self.cache.keys())
|
||||
for k in keys:
|
||||
self.delete(k)
|
||||
if torch.cuda.is_available():
|
||||
torch.cuda.empty_cache()
|
||||
|
||||
def __len__(self):
|
||||
return len(self.cache.keys())
|
||||
|
Loading…
Reference in New Issue
Block a user