mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-21 23:12:07 +00:00
feat: add workflow to build flash_attn
This commit is contained in:
parent
38688ba45d
commit
7d1651f4a2
193
.github/workflows/python-packaging.yaml
vendored
Normal file
193
.github/workflows/python-packaging.yaml
vendored
Normal file
@ -0,0 +1,193 @@
|
||||
# based on https://github.com/Dao-AILab/flash-attention/blob/main/.github/workflows/publish.yml
|
||||
name: Build wheels and deploy
|
||||
|
||||
on:
|
||||
create:
|
||||
tags:
|
||||
- v*
|
||||
|
||||
jobs:
|
||||
setup_release:
|
||||
name: Create Release
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Get the tag version
|
||||
id: extract_branch
|
||||
run: echo ::set-output name=branch::${GITHUB_REF#refs/tags/}
|
||||
shell: bash
|
||||
|
||||
- name: Create Release
|
||||
id: create_release
|
||||
uses: actions/create-release@v1
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
with:
|
||||
tag_name: ${{ steps.extract_branch.outputs.branch }}
|
||||
release_name: ${{ steps.extract_branch.outputs.branch }}
|
||||
|
||||
build_wheels:
|
||||
name: Build Wheel
|
||||
needs: setup_release
|
||||
runs-on: ${{ matrix.os }}
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
# Using ubuntu-20.04 instead of 22.04 for more compatibility (glibc). Ideally we'd use the
|
||||
# manylinux docker image, but I haven't figured out how to install CUDA on manylinux.
|
||||
os: [ubuntu-20.04]
|
||||
python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"]
|
||||
torch-version: ["1.12.1", "1.13.1", "2.0.1", "2.1.2", "2.2.2", "2.3.0"]
|
||||
cuda-version: ["11.8.0", "12.2.2"]
|
||||
# We need separate wheels that either uses C++11 ABI (-D_GLIBCXX_USE_CXX11_ABI) or not.
|
||||
# Pytorch wheels currently don't use it, but nvcr images have Pytorch compiled with C++11 ABI.
|
||||
# Without this we get import error (undefined symbol: _ZN3c105ErrorC2ENS_14SourceLocationESs)
|
||||
# when building without C++11 ABI and using it on nvcr images.
|
||||
cxx11_abi: ["FALSE", "TRUE"]
|
||||
exclude:
|
||||
# see https://github.com/pytorch/pytorch/blob/main/RELEASE.md#release-compatibility-matrix
|
||||
# Pytorch <= 1.12 does not support Python 3.11
|
||||
- torch-version: "1.12.1"
|
||||
python-version: "3.11"
|
||||
# Pytorch >= 2.0 only supports Python >= 3.8
|
||||
- torch-version: "2.0.1"
|
||||
python-version: "3.7"
|
||||
- torch-version: "2.1.2"
|
||||
python-version: "3.7"
|
||||
- torch-version: "2.2.2"
|
||||
python-version: "3.7"
|
||||
- torch-version: "2.3.0"
|
||||
python-version: "3.7"
|
||||
# Pytorch <= 2.0 only supports CUDA <= 11.8
|
||||
- torch-version: "1.12.1"
|
||||
cuda-version: "12.2.2"
|
||||
- torch-version: "1.13.1"
|
||||
cuda-version: "12.2.2"
|
||||
- torch-version: "2.0.1"
|
||||
cuda-version: "12.2.2"
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
- name: Set CUDA and PyTorch versions
|
||||
run: |
|
||||
echo "MATRIX_CUDA_VERSION=$(echo ${{ matrix.cuda-version }} | awk -F \. {'print $1 $2'})" >> $GITHUB_ENV
|
||||
echo "MATRIX_TORCH_VERSION=$(echo ${{ matrix.torch-version }} | awk -F \. {'print $1 "." $2'})" >> $GITHUB_ENV
|
||||
echo "MATRIX_PYTHON_VERSION=$(echo ${{ matrix.python-version }} | awk -F \. {'print $1 $2'})" >> $GITHUB_ENV
|
||||
|
||||
- name: Free up disk space
|
||||
if: ${{ runner.os == 'Linux' }}
|
||||
# https://github.com/easimon/maximize-build-space/blob/master/action.yml
|
||||
# https://github.com/easimon/maximize-build-space/tree/test-report
|
||||
run: |
|
||||
sudo rm -rf /usr/share/dotnet
|
||||
sudo rm -rf /opt/ghc
|
||||
sudo rm -rf /opt/hostedtoolcache/CodeQL
|
||||
|
||||
- name: Set up swap space
|
||||
if: runner.os == 'Linux'
|
||||
uses: pierotofy/set-swap-space@v1.0
|
||||
with:
|
||||
swap-size-gb: 10
|
||||
|
||||
- name: Install CUDA ${{ matrix.cuda-version }}
|
||||
if: ${{ matrix.cuda-version != 'cpu' }}
|
||||
uses: Jimver/cuda-toolkit@v0.2.14
|
||||
id: cuda-toolkit
|
||||
with:
|
||||
cuda: ${{ matrix.cuda-version }}
|
||||
linux-local-args: '["--toolkit"]'
|
||||
# default method is "local", and we're hitting some error with caching for CUDA 11.8 and 12.1
|
||||
# method: ${{ (matrix.cuda-version == '11.8.0' || matrix.cuda-version == '12.1.0') && 'network' || 'local' }}
|
||||
method: "network"
|
||||
# We need the cuda libraries (e.g. cuSparse, cuSolver) for compiling PyTorch extensions,
|
||||
# not just nvcc
|
||||
# sub-packages: '["nvcc"]'
|
||||
|
||||
- name: Install PyTorch ${{ matrix.torch-version }}+cu${{ matrix.cuda-version }}
|
||||
run: |
|
||||
pip install --upgrade pip
|
||||
# If we don't install before installing Pytorch, we get error for torch 2.0.1
|
||||
# ERROR: Could not find a version that satisfies the requirement setuptools>=40.8.0 (from versions: none)
|
||||
pip install lit
|
||||
# We want to figure out the CUDA version to download pytorch
|
||||
# e.g. we can have system CUDA version being 11.7 but if torch==1.12 then we need to download the wheel from cu116
|
||||
# see https://github.com/pytorch/pytorch/blob/main/RELEASE.md#release-compatibility-matrix
|
||||
# This code is ugly, maybe there's a better way to do this.
|
||||
export TORCH_CUDA_VERSION=$(python -c "from os import environ as env; \
|
||||
minv = {'1.12': 113, '1.13': 116, '2.0': 117, '2.1': 118, '2.2': 118, '2.3': 118}[env['MATRIX_TORCH_VERSION']]; \
|
||||
maxv = {'1.12': 116, '1.13': 117, '2.0': 118, '2.1': 121, '2.2': 121, '2.3': 121}[env['MATRIX_TORCH_VERSION']]; \
|
||||
print(max(min(int(env['MATRIX_CUDA_VERSION']), maxv), minv))" \
|
||||
)
|
||||
if [[ ${{ matrix.torch-version }} == *"dev"* ]]; then
|
||||
if [[ ${MATRIX_TORCH_VERSION} == "2.2" ]]; then
|
||||
# --no-deps because we can't install old versions of pytorch-triton
|
||||
pip install typing-extensions jinja2
|
||||
pip install --no-cache-dir --no-deps --pre https://download.pytorch.org/whl/nightly/cu${TORCH_CUDA_VERSION}/torch-${{ matrix.torch-version }}%2Bcu${TORCH_CUDA_VERSION}-cp${MATRIX_PYTHON_VERSION}-cp${MATRIX_PYTHON_VERSION}-linux_x86_64.whl
|
||||
else
|
||||
pip install --no-cache-dir --pre torch==${{ matrix.torch-version }} --index-url https://download.pytorch.org/whl/nightly/cu${TORCH_CUDA_VERSION}
|
||||
fi
|
||||
else
|
||||
pip install --no-cache-dir torch==${{ matrix.torch-version }} --index-url https://download.pytorch.org/whl/cu${TORCH_CUDA_VERSION}
|
||||
fi
|
||||
nvcc --version
|
||||
python --version
|
||||
python -c "import torch; print('PyTorch:', torch.__version__)"
|
||||
python -c "import torch; print('CUDA:', torch.version.cuda)"
|
||||
python -c "from torch.utils import cpp_extension; print (cpp_extension.CUDA_HOME)"
|
||||
shell: bash
|
||||
|
||||
- name: Build wheel
|
||||
run: |
|
||||
# Ensure we have the correct version of setuptools to avoid compatibility issues with CUDA
|
||||
pip install setuptools==68.0.0
|
||||
pip install ninja packaging wheel
|
||||
# Set up the environment for CUDA
|
||||
export PATH=/usr/local/nvidia/bin:/usr/local/nvidia/lib64:$PATH
|
||||
export LD_LIBRARY_PATH=/usr/local/nvidia/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH
|
||||
# Clone the repository at the specific commit
|
||||
git clone https://github.com/HazyResearch/flash-attention.git
|
||||
cd flash-attention
|
||||
git checkout 3a9bfd076f98746c73362328958dbc68d145fbec
|
||||
# Build the wheel with limited jobs to prevent OOM issues on the GitHub runner
|
||||
MAX_JOBS=2 FLASH_ATTENTION_FORCE_BUILD="TRUE" FLASH_ATTENTION_FORCE_CXX11_ABI=${{ matrix.cxx11_abi }} python setup.py bdist_wheel --dist-dir=dist
|
||||
# Generate a custom name for the wheel to include CUDA and Torch versions
|
||||
tmpname=cu${MATRIX_CUDA_VERSION}torch${MATRIX_TORCH_VERSION}cxx11abi${{ matrix.cxx11_abi }}
|
||||
wheel_name=$(ls dist/*.whl | xargs -n 1 basename | sed "s/-/+$tmpname-/2")
|
||||
# Rename the wheel with the custom name
|
||||
ls dist/*.whl | xargs -I {} mv {} dist/${wheel_name}
|
||||
# Save the wheel name to the GitHub environment
|
||||
echo "wheel_name=${wheel_name}" >> $GITHUB_ENV
|
||||
|
||||
- name: Log Built Wheels
|
||||
run: |
|
||||
ls dist
|
||||
|
||||
- name: Get the tag version
|
||||
id: extract_branch
|
||||
run: echo ::set-output name=branch::${GITHUB_REF#refs/tags/}
|
||||
|
||||
- name: Get Release with tag
|
||||
id: get_current_release
|
||||
uses: joutvhu/get-release@v1
|
||||
with:
|
||||
tag_name: ${{ steps.extract_branch.outputs.branch }}
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Upload Release Asset
|
||||
id: upload_release_asset
|
||||
uses: actions/upload-release-asset@v1
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
with:
|
||||
upload_url: ${{ steps.get_current_release.outputs.upload_url }}
|
||||
asset_path: ./dist/${{env.wheel_name}}
|
||||
asset_name: ${{env.wheel_name}}
|
||||
asset_content_type: application/*
|
Loading…
Reference in New Issue
Block a user