mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 04:14:52 +00:00
reset peak memory
This commit is contained in:
parent
99568eef7b
commit
05d2a77e4c
4
.github/workflows/build.yaml
vendored
4
.github/workflows/build.yaml
vendored
@ -214,10 +214,6 @@ jobs:
|
||||
- name: Install
|
||||
run: |
|
||||
make install-integration-tests
|
||||
- name: Setup tmate session
|
||||
uses: mxschmitt/action-tmate@v3
|
||||
with:
|
||||
limit-access-to-actor: true
|
||||
- name: Run tests
|
||||
run: |
|
||||
export DOCKER_IMAGE=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:sha-${{ env.GITHUB_SHA_SHORT }}
|
||||
|
@ -714,6 +714,7 @@ class FlashCausalLM(Model):
|
||||
global CACHE_MANAGER
|
||||
|
||||
torch.cuda.empty_cache()
|
||||
torch.cuda.reset_peak_memory_stats(self.device)
|
||||
try:
|
||||
CACHE_MANAGER = CacheManager(
|
||||
batch.blocks,
|
||||
|
Loading…
Reference in New Issue
Block a user