fix workflow

This commit is contained in:
Felix Marty 2024-06-20 09:28:10 +00:00 committed by Nicolas Patry
parent 5fb8c275c3
commit 67999773f3
No known key found for this signature in database
GPG Key ID: E939E8CC91A1C674
2 changed files with 22 additions and 4 deletions

View File

@ -172,16 +172,24 @@ jobs:
concurrency:
group: ${{ github.workflow }}-${{ github.job }}-${{ needs.build-and-push.outputs.label }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
if: needs.build-and-push.outputs.runs_on != 'ubuntu-latest'
if: needs.build-and-push.outputs.runs_on == 'amd-gpu-tgi'
container:
image: ${{ needs.build-and-push.outputs.docker_image }}
options: --shm-size "16gb" --ipc host -v ${{ needs.build-and-push.outputs.docker_volume }}:/data
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Clean Hugging Face cache
shell: bash
run: |
if [[ ${{ inputs.hardware }} == "rocm" ]]
then
python clean_cache.py
echo "pwd:"
pwd
echo "ls:"
ls
python integration-tests/clean_cache.py
fi

View File

@ -64,6 +64,9 @@ def cleanup_cache():
size_per_model[model_id] = model_size
total_required_size = sum(size_per_model.values())
print(f"Total required disk: {size_per_model:.2f} GB")
cached_dir = huggingface_hub.scan_cache_dir()
cache_size_per_model = {}
@ -86,11 +89,18 @@ def cleanup_cache():
total_required_cached_size = sum(cached_required_size_per_model.values())
total_other_cached_size = sum(cache_size_per_model.values())
total_required_size = sum(size_per_model.values())
total_non_cached_required_size = total_required_size - total_required_cached_size
print(
f"Total HF cached models size: {total_other_cached_size + total_required_cached_size:.2f} GB"
)
print(
f"Total non-necessary HF cached models size: {total_other_cached_size:.2f} GB"
)
free_memory = shutil.disk_usage("/data").free * 1e-9
print(f"Free memory: {free_memory:.2f} GB")
if free_memory + total_other_cached_size < total_non_cached_required_size * 1.05:
raise ValueError(
"Not enough space on device to execute the complete CI, please clean up the CI machine"