From 2007a9473a3cad064d3757a6d5f8c34d0d4150cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20de=20Kok?= Date: Tue, 19 Nov 2024 14:55:29 +0100 Subject: [PATCH] Update to moe-kernels 0.7.0 (#2720) This version syncs with the vLLM kernels and brings some performance improvements. --- flake.lock | 25 ++++++++++++------------- flake.nix | 2 +- server/poetry.lock | 26 +++++++++++++------------- server/pyproject.toml | 8 ++++---- 4 files changed, 30 insertions(+), 31 deletions(-) diff --git a/flake.lock b/flake.lock index 148604616..ac6f6dd09 100644 --- a/flake.lock +++ b/flake.lock @@ -108,11 +108,11 @@ "pre-commit-hooks": "pre-commit-hooks_3" }, "locked": { - "lastModified": 1723311214, - "narHash": "sha256-xdGZQBEa1AC2us/sY3igS/CucWY6jErXsAvCFRhB2LI=", + "lastModified": 1730277369, + "narHash": "sha256-yvQbeJbnnwCB68yv7uZXdGb+P7NMn5JMGBw0aBHymDI=", "owner": "nix-community", "repo": "crate2nix", - "rev": "236f6addfd452a48be805819e3216af79e988fd5", + "rev": "151122427d030874ebef3517cda766a6984e6ed6", "type": "github" }, "original": { @@ -581,11 +581,11 @@ }, "nix-filter": { "locked": { - "lastModified": 1710156097, - "narHash": "sha256-1Wvk8UP7PXdf8bCCaEoMnOT1qe5/Duqgj+rL8sRQsSM=", + "lastModified": 1730207686, + "narHash": "sha256-SCHiL+1f7q9TAnxpasriP6fMarWE5H43t25F5/9e28I=", "owner": "numtide", "repo": "nix-filter", - "rev": "3342559a24e85fc164b295c3444e8a139924675b", + "rev": "776e68c1d014c3adde193a18db9d738458cd2ba4", "type": "github" }, "original": { @@ -853,11 +853,11 @@ ] }, "locked": { - "lastModified": 1729045942, - "narHash": "sha256-HjmK0x5Zm2TK2vFpC7XBM2e3EDNVnAIuEoU2FkeN8xw=", + "lastModified": 1730687492, + "narHash": "sha256-xQVadjquBA/tFxDt5A55LJ1D1AvkVWsnrKC2o+pr8F4=", "owner": "oxalica", "repo": "rust-overlay", - "rev": "9de3cea452d2401d6f93c06ad985178a4e11d1fc", + "rev": "41814763a2c597755b0755dbe3e721367a5e420f", "type": "github" }, "original": { @@ -978,16 +978,15 @@ "nixpkgs": "nixpkgs_6" }, "locked": { - "lastModified": 1731923801, - "narHash": "sha256-SVtXtTGgnKjwPwMLe030l/DVhcm1vH4fXM7tUAPYOZc=", + "lastModified": 1732005645, + "narHash": "sha256-WbmABjHuixrYrGtiTc7cyj/EA8qta/FjRvmlU3JvKKQ=", "owner": "huggingface", "repo": "text-generation-inference-nix", - "rev": "b87d4b5bede0ffed7da50e9a5246b133c7d618dc", + "rev": "93a6aa5c029d893226880d313d24237a379b18c7", "type": "github" }, "original": { "owner": "huggingface", - "ref": "marlin-kernels-0.3.5", "repo": "text-generation-inference-nix", "type": "github" } diff --git a/flake.nix b/flake.nix index cdde7a4ca..f26a983ed 100644 --- a/flake.nix +++ b/flake.nix @@ -5,7 +5,7 @@ inputs.nixpkgs.follows = "tgi-nix/nixpkgs"; }; nix-filter.url = "github:numtide/nix-filter"; - tgi-nix.url = "github:huggingface/text-generation-inference-nix/marlin-kernels-0.3.5"; + tgi-nix.url = "github:huggingface/text-generation-inference-nix"; nixpkgs.follows = "tgi-nix/nixpkgs"; flake-utils.url = "github:numtide/flake-utils"; rust-overlay = { diff --git a/server/poetry.lock b/server/poetry.lock index b3f75a45f..58450e778 100644 --- a/server/poetry.lock +++ b/server/poetry.lock @@ -1367,12 +1367,12 @@ files = [ [[package]] name = "moe-kernels" -version = "0.6.0" +version = "0.7.0" description = "MoE kernels" optional = true python-versions = ">=3.7" files = [ - {file = "moe_kernels-0.6.0+cu123torch2.4-cp310-cp310-linux_x86_64.whl", hash = "sha256:f28fd2a56c3ac7bfe74bc44cc7c8c0791a2644ad689b084ea4ed6decb7f41c25"}, + {file = "moe_kernels-0.7.0+cu123torch2.4-cp310-cp310-linux_x86_64.whl", hash = "sha256:f8c126395f11522881c6bf1f6120e3670822006a84e2ff74af561c22445746b3"}, ] [package.dependencies] @@ -1382,16 +1382,16 @@ triton = "*" [package.source] type = "url" -url = "https://github.com/danieldk/moe-kernels/releases/download/v0.6.0/moe_kernels-0.6.0+cu123torch2.4-cp310-cp310-linux_x86_64.whl" +url = "https://github.com/danieldk/moe-kernels/releases/download/v0.7.0/moe_kernels-0.7.0+cu123torch2.4-cp310-cp310-linux_x86_64.whl" [[package]] name = "moe-kernels" -version = "0.6.0" +version = "0.7.0" description = "MoE kernels" optional = true python-versions = ">=3.7" files = [ - {file = "moe_kernels-0.6.0+cu123torch2.4-cp311-cp311-linux_x86_64.whl", hash = "sha256:db475948fd9f7a8647aa3f73256ff4d3bb111425305bcd0b0d3559ccc75b8937"}, + {file = "moe_kernels-0.7.0+cu123torch2.4-cp311-cp311-linux_x86_64.whl", hash = "sha256:2afff8346251f01d5d90bab738e3dfaa6b14a414a9c88205d396ab2bae87983a"}, ] [package.dependencies] @@ -1401,16 +1401,16 @@ triton = "*" [package.source] type = "url" -url = "https://github.com/danieldk/moe-kernels/releases/download/v0.6.0/moe_kernels-0.6.0+cu123torch2.4-cp311-cp311-linux_x86_64.whl" +url = "https://github.com/danieldk/moe-kernels/releases/download/v0.7.0/moe_kernels-0.7.0+cu123torch2.4-cp311-cp311-linux_x86_64.whl" [[package]] name = "moe-kernels" -version = "0.6.0" +version = "0.7.0" description = "MoE kernels" optional = true python-versions = ">=3.7" files = [ - {file = "moe_kernels-0.6.0+cu123torch2.4-cp312-cp312-linux_x86_64.whl", hash = "sha256:364be07c06aafbab1f51d9e26d9a4ff658defe1462a4c645abaf7b895ed163a8"}, + {file = "moe_kernels-0.7.0+cu123torch2.4-cp312-cp312-linux_x86_64.whl", hash = "sha256:b1a29e33d3b7d85e2b4f8bd47db28211096d1f645e0868d5a1f3666ebb9bd9e3"}, ] [package.dependencies] @@ -1420,16 +1420,16 @@ triton = "*" [package.source] type = "url" -url = "https://github.com/danieldk/moe-kernels/releases/download/v0.6.0/moe_kernels-0.6.0+cu123torch2.4-cp312-cp312-linux_x86_64.whl" +url = "https://github.com/danieldk/moe-kernels/releases/download/v0.7.0/moe_kernels-0.7.0+cu123torch2.4-cp312-cp312-linux_x86_64.whl" [[package]] name = "moe-kernels" -version = "0.6.0" +version = "0.7.0" description = "MoE kernels" optional = true python-versions = ">=3.7" files = [ - {file = "moe_kernels-0.6.0+cu123torch2.4-cp39-cp39-linux_x86_64.whl", hash = "sha256:81e7fa25fb5ed5336f5151994f5e3f600df7e166fe013576968c59415e442894"}, + {file = "moe_kernels-0.7.0+cu123torch2.4-cp39-cp39-linux_x86_64.whl", hash = "sha256:9573611174cda9f6fafa1816521e38582fd2903b321bbaf78f83cf6e3189ac7d"}, ] [package.dependencies] @@ -1439,7 +1439,7 @@ triton = "*" [package.source] type = "url" -url = "https://github.com/danieldk/moe-kernels/releases/download/v0.6.0/moe_kernels-0.6.0+cu123torch2.4-cp39-cp39-linux_x86_64.whl" +url = "https://github.com/danieldk/moe-kernels/releases/download/v0.7.0/moe_kernels-0.7.0+cu123torch2.4-cp39-cp39-linux_x86_64.whl" [[package]] name = "mpmath" @@ -4066,4 +4066,4 @@ torch = ["torch"] [metadata] lock-version = "2.0" python-versions = ">=3.9,<3.13" -content-hash = "b889115cee7f1969856f233e74721965f692e40d2a1c2fceccaf6b3bdb19680d" +content-hash = "7082f1983403ff58a1f0304e8bbf1197715b5156ddeea0f3e8287334d52c2617" diff --git a/server/pyproject.toml b/server/pyproject.toml index 194b04dae..4f6dc5a1a 100644 --- a/server/pyproject.toml +++ b/server/pyproject.toml @@ -54,10 +54,10 @@ marlin-kernels = [ { url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.5/marlin_kernels-0.3.5+cu123torch2.4-cp312-cp312-linux_x86_64.whl", python = "~3.12", optional = true }, ] moe-kernels = [ - { url = "https://github.com/danieldk/moe-kernels/releases/download/v0.6.0/moe_kernels-0.6.0+cu123torch2.4-cp39-cp39-linux_x86_64.whl", python = "~3.9", optional = true }, - { url = "https://github.com/danieldk/moe-kernels/releases/download/v0.6.0/moe_kernels-0.6.0+cu123torch2.4-cp310-cp310-linux_x86_64.whl", python = "~3.10", optional = true }, - { url = "https://github.com/danieldk/moe-kernels/releases/download/v0.6.0/moe_kernels-0.6.0+cu123torch2.4-cp311-cp311-linux_x86_64.whl", python = "~3.11", optional = true }, - { url = "https://github.com/danieldk/moe-kernels/releases/download/v0.6.0/moe_kernels-0.6.0+cu123torch2.4-cp312-cp312-linux_x86_64.whl", python = "~3.12", optional = true }, + { url = "https://github.com/danieldk/moe-kernels/releases/download/v0.7.0/moe_kernels-0.7.0+cu123torch2.4-cp39-cp39-linux_x86_64.whl", python = "~3.9", optional = true }, + { url = "https://github.com/danieldk/moe-kernels/releases/download/v0.7.0/moe_kernels-0.7.0+cu123torch2.4-cp310-cp310-linux_x86_64.whl", python = "~3.10", optional = true }, + { url = "https://github.com/danieldk/moe-kernels/releases/download/v0.7.0/moe_kernels-0.7.0+cu123torch2.4-cp311-cp311-linux_x86_64.whl", python = "~3.11", optional = true }, + { url = "https://github.com/danieldk/moe-kernels/releases/download/v0.7.0/moe_kernels-0.7.0+cu123torch2.4-cp312-cp312-linux_x86_64.whl", python = "~3.12", optional = true }, ] rich = "^13.7.1"