From ee0dffcd14e2569c9b544eb7f2ce66c193f0bfe8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20de=20Kok?= Date: Wed, 29 Jan 2025 18:19:55 +0100 Subject: [PATCH] Update to moe-kernels 0.8.0 (#2966) --- flake.lock | 8 ++-- flake.nix | 2 +- server/pyproject.toml | 7 +-- server/uv.lock | 104 ++---------------------------------------- 4 files changed, 11 insertions(+), 110 deletions(-) diff --git a/flake.lock b/flake.lock index 6bf4ba2f..4779427f 100644 --- a/flake.lock +++ b/flake.lock @@ -978,16 +978,16 @@ "nixpkgs": "nixpkgs_6" }, "locked": { - "lastModified": 1737715219, - "narHash": "sha256-oIxoNreSeSILjWxcZHXW3cdcoNQHnXO5deXoIiC1tng=", + "lastModified": 1738163501, + "narHash": "sha256-MW+HVo3Kjr/W8ra7qyeG2nW/Z6fsZ7nDfWs3Uvw9Xko=", "owner": "huggingface", "repo": "text-generation-inference-nix", - "rev": "b91a56628f446c6cb79d224f17c1c66fe1a260f6", + "rev": "bfdd9594c7d99cf8442e06f3bb2b4ab08185affe", "type": "github" }, "original": { "owner": "huggingface", - "ref": "attention-kernels-0.2.0", + "ref": "moe-kernels-0.8.0", "repo": "text-generation-inference-nix", "type": "github" } diff --git a/flake.nix b/flake.nix index 883bae91..d8a8a6cf 100644 --- a/flake.nix +++ b/flake.nix @@ -5,7 +5,7 @@ inputs.nixpkgs.follows = "tgi-nix/nixpkgs"; }; nix-filter.url = "github:numtide/nix-filter"; - tgi-nix.url = "github:huggingface/text-generation-inference-nix/attention-kernels-0.2.0"; + tgi-nix.url = "github:huggingface/text-generation-inference-nix/moe-kernels-0.8.0"; nixpkgs.follows = "tgi-nix/nixpkgs"; flake-utils.url = "github:numtide/flake-utils"; rust-overlay = { diff --git a/server/pyproject.toml b/server/pyproject.toml index dbc1fa7a..ceedc3bd 100644 --- a/server/pyproject.toml +++ b/server/pyproject.toml @@ -75,12 +75,7 @@ marlin-kernels = [ { url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.7/marlin_kernels-0.3.7+cu123torch2.5-cp311-cp311-linux_x86_64.whl", marker = "python_version == '3.11'" }, { url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.7/marlin_kernels-0.3.7+cu123torch2.5-cp312-cp312-linux_x86_64.whl", marker = "python_version == '3.12'" }, ] -moe-kernels = [ - { url = "https://github.com/danieldk/moe-kernels/releases/download/v0.7.0/moe_kernels-0.7.0+cu123torch2.5-cp39-cp39-linux_x86_64.whl", marker = "python_version == '3.9'" }, - { url = "https://github.com/danieldk/moe-kernels/releases/download/v0.7.0/moe_kernels-0.7.0+cu123torch2.5-cp310-cp310-linux_x86_64.whl", marker = "python_version == '3.10'" }, - { url = "https://github.com/danieldk/moe-kernels/releases/download/v0.7.0/moe_kernels-0.7.0+cu123torch2.5-cp311-cp311-linux_x86_64.whl", marker = "python_version == '3.11'" }, - { url = "https://github.com/danieldk/moe-kernels/releases/download/v0.7.0/moe_kernels-0.7.0+cu123torch2.5-cp312-cp312-linux_x86_64.whl", marker = "python_version == '3.12'" }, -] +moe-kernels.url = "https://github.com/danieldk/moe-kernels/releases/download/v0.8.0/moe_kernels-0.8.0+cu123torch2.5-cp39-abi3-linux_x86_64.whl" [tool.pytest.ini_options] markers = ["private: marks tests as requiring an admin hf token (deselect with '-m \"not private\"')"] diff --git a/server/uv.lock b/server/uv.lock index ef26b463..5684d581 100644 --- a/server/uv.lock +++ b/server/uv.lock @@ -997,101 +997,15 @@ wheels = [ [[package]] name = "moe-kernels" -version = "0.7.0" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version >= '3.13'", -] +version = "0.8.0" +source = { url = "https://github.com/danieldk/moe-kernels/releases/download/v0.8.0/moe_kernels-0.8.0+cu123torch2.5-cp39-abi3-linux_x86_64.whl" } dependencies = [ - { name = "nvidia-ml-py", marker = "python_full_version >= '3.13'" }, - { name = "torch", marker = "python_full_version >= '3.13'" }, - { name = "triton", marker = "python_full_version >= '3.13'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/9c/a1/76f32a7ce5b18e5b841e64e0e2631fa2b94f432743d6ab76b76fb24fe961/moe-kernels-0.7.0.tar.gz", hash = "sha256:1f564affad32077fe17c24566bd6200e38087e90b248636bab55fac2e1ac6874", size = 23629 } - -[[package]] -name = "moe-kernels" -version = "0.7.0" -source = { url = "https://github.com/danieldk/moe-kernels/releases/download/v0.7.0/moe_kernels-0.7.0+cu123torch2.5-cp310-cp310-linux_x86_64.whl" } -resolution-markers = [ - "python_full_version == '3.10.*'", -] -dependencies = [ - { name = "nvidia-ml-py", marker = "python_full_version == '3.10.*'" }, - { name = "torch", marker = "python_full_version == '3.10.*'" }, - { name = "triton", marker = "python_full_version == '3.10.*'" }, -] -wheels = [ - { url = "https://github.com/danieldk/moe-kernels/releases/download/v0.7.0/moe_kernels-0.7.0+cu123torch2.5-cp310-cp310-linux_x86_64.whl", hash = "sha256:242d5de087902aa84dff54b6ba140b4066904fe5e7757f934645343b052ab076" }, -] - -[package.metadata] -requires-dist = [ { name = "nvidia-ml-py" }, { name = "torch" }, { name = "triton" }, ] - -[[package]] -name = "moe-kernels" -version = "0.7.0" -source = { url = "https://github.com/danieldk/moe-kernels/releases/download/v0.7.0/moe_kernels-0.7.0+cu123torch2.5-cp311-cp311-linux_x86_64.whl" } -resolution-markers = [ - "python_full_version == '3.11.*'", -] -dependencies = [ - { name = "nvidia-ml-py", marker = "python_full_version == '3.11.*'" }, - { name = "torch", marker = "python_full_version == '3.11.*'" }, - { name = "triton", marker = "python_full_version == '3.11.*'" }, -] wheels = [ - { url = "https://github.com/danieldk/moe-kernels/releases/download/v0.7.0/moe_kernels-0.7.0+cu123torch2.5-cp311-cp311-linux_x86_64.whl", hash = "sha256:8198a3388a03a3248d5f5698097e8ce0a73b6a01f9854fc2338aacc57e554e8a" }, -] - -[package.metadata] -requires-dist = [ - { name = "nvidia-ml-py" }, - { name = "torch" }, - { name = "triton" }, -] - -[[package]] -name = "moe-kernels" -version = "0.7.0" -source = { url = "https://github.com/danieldk/moe-kernels/releases/download/v0.7.0/moe_kernels-0.7.0+cu123torch2.5-cp312-cp312-linux_x86_64.whl" } -resolution-markers = [ - "python_full_version == '3.12.*'", -] -dependencies = [ - { name = "nvidia-ml-py", marker = "python_full_version == '3.12.*'" }, - { name = "torch", marker = "python_full_version == '3.12.*'" }, - { name = "triton", marker = "python_full_version == '3.12.*'" }, -] -wheels = [ - { url = "https://github.com/danieldk/moe-kernels/releases/download/v0.7.0/moe_kernels-0.7.0+cu123torch2.5-cp312-cp312-linux_x86_64.whl", hash = "sha256:b35fb02ae560b560f4af107791a3308dc97d5ca57d39bab20acec3a0f082ccf2" }, -] - -[package.metadata] -requires-dist = [ - { name = "nvidia-ml-py" }, - { name = "torch" }, - { name = "triton" }, -] - -[[package]] -name = "moe-kernels" -version = "0.7.0" -source = { url = "https://github.com/danieldk/moe-kernels/releases/download/v0.7.0/moe_kernels-0.7.0+cu123torch2.5-cp39-cp39-linux_x86_64.whl" } -resolution-markers = [ - "python_full_version < '3.10'", -] -dependencies = [ - { name = "nvidia-ml-py", marker = "python_full_version < '3.10'" }, - { name = "torch", marker = "python_full_version < '3.10'" }, - { name = "triton", marker = "python_full_version < '3.10'" }, -] -wheels = [ - { url = "https://github.com/danieldk/moe-kernels/releases/download/v0.7.0/moe_kernels-0.7.0+cu123torch2.5-cp39-cp39-linux_x86_64.whl", hash = "sha256:cf8d276deb7a4d40fed3eb02e1b6f8d08ccec0f4256260922af13927ca044f56" }, + { url = "https://github.com/danieldk/moe-kernels/releases/download/v0.8.0/moe_kernels-0.8.0+cu123torch2.5-cp39-abi3-linux_x86_64.whl", hash = "sha256:92c4e083c037a325458e731dda6770790495cab273c9bbf5f50fb8e262c099de" }, ] [package.metadata] @@ -2798,11 +2712,7 @@ marlin = [ { name = "marlin-kernels", version = "0.3.7", source = { url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.7/marlin_kernels-0.3.7+cu123torch2.5-cp39-cp39-linux_x86_64.whl" }, marker = "python_full_version < '3.10'" }, ] moe = [ - { name = "moe-kernels", version = "0.7.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.13'" }, - { name = "moe-kernels", version = "0.7.0", source = { url = "https://github.com/danieldk/moe-kernels/releases/download/v0.7.0/moe_kernels-0.7.0+cu123torch2.5-cp310-cp310-linux_x86_64.whl" }, marker = "python_full_version == '3.10.*'" }, - { name = "moe-kernels", version = "0.7.0", source = { url = "https://github.com/danieldk/moe-kernels/releases/download/v0.7.0/moe_kernels-0.7.0+cu123torch2.5-cp311-cp311-linux_x86_64.whl" }, marker = "python_full_version == '3.11.*'" }, - { name = "moe-kernels", version = "0.7.0", source = { url = "https://github.com/danieldk/moe-kernels/releases/download/v0.7.0/moe_kernels-0.7.0+cu123torch2.5-cp312-cp312-linux_x86_64.whl" }, marker = "python_full_version == '3.12.*'" }, - { name = "moe-kernels", version = "0.7.0", source = { url = "https://github.com/danieldk/moe-kernels/releases/download/v0.7.0/moe_kernels-0.7.0+cu123torch2.5-cp39-cp39-linux_x86_64.whl" }, marker = "python_full_version < '3.10'" }, + { name = "moe-kernels" }, ] outlines = [ { name = "outlines" }, @@ -2836,11 +2746,7 @@ requires-dist = [ { name = "marlin-kernels", marker = "python_full_version == '3.10.*' and extra == 'marlin'", url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.7/marlin_kernels-0.3.7+cu123torch2.5-cp310-cp310-linux_x86_64.whl" }, { name = "marlin-kernels", marker = "python_full_version == '3.11.*' and extra == 'marlin'", url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.7/marlin_kernels-0.3.7+cu123torch2.5-cp311-cp311-linux_x86_64.whl" }, { name = "marlin-kernels", marker = "python_full_version == '3.12.*' and extra == 'marlin'", url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.7/marlin_kernels-0.3.7+cu123torch2.5-cp312-cp312-linux_x86_64.whl" }, - { name = "moe-kernels", marker = "python_full_version == '3.9.*' and extra == 'moe'", url = "https://github.com/danieldk/moe-kernels/releases/download/v0.7.0/moe_kernels-0.7.0+cu123torch2.5-cp39-cp39-linux_x86_64.whl" }, - { name = "moe-kernels", marker = "(python_full_version < '3.9' and extra == 'moe') or (python_full_version >= '3.13' and extra == 'moe')" }, - { name = "moe-kernels", marker = "python_full_version == '3.10.*' and extra == 'moe'", url = "https://github.com/danieldk/moe-kernels/releases/download/v0.7.0/moe_kernels-0.7.0+cu123torch2.5-cp310-cp310-linux_x86_64.whl" }, - { name = "moe-kernels", marker = "python_full_version == '3.11.*' and extra == 'moe'", url = "https://github.com/danieldk/moe-kernels/releases/download/v0.7.0/moe_kernels-0.7.0+cu123torch2.5-cp311-cp311-linux_x86_64.whl" }, - { name = "moe-kernels", marker = "python_full_version == '3.12.*' and extra == 'moe'", url = "https://github.com/danieldk/moe-kernels/releases/download/v0.7.0/moe_kernels-0.7.0+cu123torch2.5-cp312-cp312-linux_x86_64.whl" }, + { name = "moe-kernels", marker = "extra == 'moe'", url = "https://github.com/danieldk/moe-kernels/releases/download/v0.8.0/moe_kernels-0.8.0+cu123torch2.5-cp39-abi3-linux_x86_64.whl" }, { name = "mypy-protobuf", marker = "extra == 'gen'", specifier = ">=3.6.0" }, { name = "numpy", specifier = ">=1.26,<3" }, { name = "opentelemetry-api", specifier = ">=1.27.0" },