mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-05-16 15:12:09 +00:00
* Use Hub kernels for Marlin and cutlass quantization kernels * Use hub kernels for MoE/GPTQ-Marlin MoE * Use attention kernels from the Hub * Cache the kernels in the Docker image * Update moe kernels * Support loading local kernels for development * Support latest moe kernels * Update to moe 0.1.1 * CI: download locked kernels for server tests * Fixup some imports * CI: activate venv * Fix unused imports * Nix: add attention/moe/quantization kernels * Update hf-kernels to 0.1.5 * Update kernels * Update tgi-nix flake for hf-kernels * Fix EOF * Take `load_kernel` out of a frequently-called function * Hoist another case of kernel loading out of a somewhat hot function * marlin-kernels -> quantization * attention -> paged-attention * EOF fix * Update hf-kernels, fixup Docker * ipex fix * Remove outdated TODO
72 lines
2.0 KiB
YAML
72 lines
2.0 KiB
YAML
name: Server Tests
|
|
|
|
on:
|
|
pull_request:
|
|
paths:
|
|
- ".github/workflows/tests.yaml"
|
|
- "server/**"
|
|
- "proto/**"
|
|
- "router/**"
|
|
- "launcher/**"
|
|
- "Cargo.lock"
|
|
- "rust-toolchain.toml"
|
|
|
|
concurrency:
|
|
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
|
|
cancel-in-progress: true
|
|
|
|
jobs:
|
|
run_tests:
|
|
runs-on:
|
|
group: aws-highmemory-32-plus-priv
|
|
steps:
|
|
- uses: actions/checkout@v2
|
|
- name: Set up Python
|
|
uses: actions/setup-python@v4
|
|
id: python
|
|
with:
|
|
python-version: 3.11
|
|
- name: Install Rust
|
|
uses: actions-rs/toolchain@v1
|
|
with:
|
|
# Released on: 02 May, 2024
|
|
# https://releases.rs/docs/1.78.0/
|
|
toolchain: 1.84.0
|
|
override: true
|
|
components: rustfmt, clippy
|
|
- name: Install Protoc
|
|
uses: arduino/setup-protoc@v1
|
|
- name: Clean unused files
|
|
run: |
|
|
sudo rm -rf /usr/local/lib/android # will release about 10 GB if you don't need Android
|
|
sudo rm -rf /usr/share/dotnet # will release about 20GB if you don't need .NET
|
|
- name: Install
|
|
run: |
|
|
sudo apt update
|
|
sudo apt install python3.11-dev -y
|
|
pip install -U pip uv
|
|
uv venv
|
|
source ./.venv/bin/activate
|
|
make install-cpu
|
|
- name: Download locked kernels
|
|
run: |
|
|
source ./.venv/bin/activate
|
|
hf-kernels download server
|
|
- name: Run server tests
|
|
run: |
|
|
source ./.venv/bin/activate
|
|
uv pip install pytest
|
|
export HF_TOKEN=${{ secrets.HF_TOKEN }}
|
|
pytest -s -vv server/tests
|
|
- name: Pre-commit checks
|
|
run: |
|
|
pip install pre-commit
|
|
pre-commit install
|
|
pre-commit run --all-files
|
|
- name: Run Rust tests
|
|
run: |
|
|
cargo test
|
|
- name: Run Rust tests with google feature
|
|
run: |
|
|
cargo test --features google
|