mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-10 20:04:52 +00:00
add clear cache when batch is finished
This commit is contained in:
parent
0111869ad0
commit
8793ae5890
@ -349,6 +349,7 @@ async fn batching_task(
|
|||||||
}
|
}
|
||||||
metrics::gauge!("tgi_batch_current_size", 0.0);
|
metrics::gauge!("tgi_batch_current_size", 0.0);
|
||||||
metrics::gauge!("tgi_batch_current_max_tokens", 0.0);
|
metrics::gauge!("tgi_batch_current_max_tokens", 0.0);
|
||||||
|
let _ = client.clear_cache(None).await;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -29,6 +29,8 @@ class Cache:
|
|||||||
keys = list(self.cache.keys())
|
keys = list(self.cache.keys())
|
||||||
for k in keys:
|
for k in keys:
|
||||||
self.delete(k)
|
self.delete(k)
|
||||||
|
if torch.cuda.is_available():
|
||||||
|
torch.cuda.empty_cache()
|
||||||
|
|
||||||
def __len__(self):
|
def __len__(self):
|
||||||
return len(self.cache.keys())
|
return len(self.cache.keys())
|
||||||
|
Loading…
Reference in New Issue
Block a user