From 8793ae5890a0d7862489997846c9db5c746310ff Mon Sep 17 00:00:00 2001 From: OlivierDehaene <23298448+OlivierDehaene@users.noreply.github.com> Date: Wed, 19 Jul 2023 01:12:28 +0200 Subject: [PATCH] add clear cache when batch is finished --- router/src/infer.rs | 1 + server/text_generation_server/cache.py | 2 ++ 2 files changed, 3 insertions(+) diff --git a/router/src/infer.rs b/router/src/infer.rs index 188ddc64..395c048a 100644 --- a/router/src/infer.rs +++ b/router/src/infer.rs @@ -349,6 +349,7 @@ async fn batching_task( } metrics::gauge!("tgi_batch_current_size", 0.0); metrics::gauge!("tgi_batch_current_max_tokens", 0.0); + let _ = client.clear_cache(None).await; } } } diff --git a/server/text_generation_server/cache.py b/server/text_generation_server/cache.py index 4504733e..bfe042bf 100644 --- a/server/text_generation_server/cache.py +++ b/server/text_generation_server/cache.py @@ -29,6 +29,8 @@ class Cache: keys = list(self.cache.keys()) for k in keys: self.delete(k) + if torch.cuda.is_available(): + torch.cuda.empty_cache() def __len__(self): return len(self.cache.keys())