fix workflow

2025-09-10 20:04:52 +00:00 · 2024-06-20 09:28:10 +00:00 · 2024-06-20 09:28:10 +00:00 · 67999773f3
commit 67999773f3
parent 5fb8c275c3
2 changed files with 22 additions and 4 deletions
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@ -172,16 +172,24 @@ jobs:
    concurrency:
      group: ${{ github.workflow }}-${{ github.job }}-${{ needs.build-and-push.outputs.label }}-${{ github.head_ref || github.run_id }}
      cancel-in-progress: true
-    if: needs.build-and-push.outputs.runs_on != 'ubuntu-latest'
+    if: needs.build-and-push.outputs.runs_on == 'amd-gpu-tgi'
    container:
      image: ${{ needs.build-and-push.outputs.docker_image }}
      options: --shm-size "16gb" --ipc host -v ${{ needs.build-and-push.outputs.docker_volume }}:/data
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4
      - name: Clean Hugging Face cache
        shell: bash
        run: |
          if [[ ${{ inputs.hardware }} == "rocm" ]]
          then
-            python clean_cache.py
+            echo "pwd:"
            pwd
            echo "ls:"
            ls
            python integration-tests/clean_cache.py
          fi
--- a/integration-tests/clean_cache.py
+++ b/integration-tests/clean_cache.py
@ -64,6 +64,9 @@ def cleanup_cache():
        size_per_model[model_id] = model_size
    total_required_size = sum(size_per_model.values())
    print(f"Total required disk: {size_per_model:.2f} GB")
    cached_dir = huggingface_hub.scan_cache_dir()
    cache_size_per_model = {}
@ -86,11 +89,18 @@ def cleanup_cache():
    total_required_cached_size = sum(cached_required_size_per_model.values())
    total_other_cached_size = sum(cache_size_per_model.values())
    total_required_size = sum(size_per_model.values())
    total_non_cached_required_size = total_required_size - total_required_cached_size
    print(
        f"Total HF cached models size: {total_other_cached_size + total_required_cached_size:.2f} GB"
    )
    print(
        f"Total non-necessary HF cached models size: {total_other_cached_size:.2f} GB"
    )
    free_memory = shutil.disk_usage("/data").free * 1e-9
    print(f"Free memory: {free_memory:.2f} GB")
    if free_memory + total_other_cached_size < total_non_cached_required_size * 1.05:
        raise ValueError(
            "Not enough space on device to execute the complete CI, please clean up the CI machine"