reset peak memory

2025-09-11 04:14:52 +00:00 · 2023-07-19 00:17:49 +02:00 · 2023-07-19 00:17:49 +02:00 · 05d2a77e4c
commit 05d2a77e4c
parent 99568eef7b
2 changed files with 1 additions and 4 deletions
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@ -214,10 +214,6 @@ jobs:
      - name: Install
        run: |
          make install-integration-tests
-      - name: Setup tmate session
-        uses: mxschmitt/action-tmate@v3
-        with:
-          limit-access-to-actor: true
      - name: Run tests
        run: |
          export DOCKER_IMAGE=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:sha-${{ env.GITHUB_SHA_SHORT }}
--- a/server/text_generation_server/models/flash_causal_lm.py
+++ b/server/text_generation_server/models/flash_causal_lm.py
@ -714,6 +714,7 @@ class FlashCausalLM(Model):
        global CACHE_MANAGER

        torch.cuda.empty_cache()
+        torch.cuda.reset_peak_memory_stats(self.device)
        try:
            CACHE_MANAGER = CacheManager(
                batch.blocks,