diff --git a/flake.lock b/flake.lock index 16e89558..14860461 100644 --- a/flake.lock +++ b/flake.lock @@ -2,10 +2,16 @@ "nodes": { "cachix": { "inputs": { - "devenv": ["crate2nix"], - "flake-compat": ["crate2nix"], + "devenv": [ + "crate2nix" + ], + "flake-compat": [ + "crate2nix" + ], "nixpkgs": "nixpkgs", - "pre-commit-hooks": ["crate2nix"] + "pre-commit-hooks": [ + "crate2nix" + ] }, "locked": { "lastModified": 1709700175, @@ -24,10 +30,19 @@ }, "cachix_2": { "inputs": { - "devenv": ["crate2nix", "crate2nix_stable"], - "flake-compat": ["crate2nix", "crate2nix_stable"], + "devenv": [ + "crate2nix", + "crate2nix_stable" + ], + "flake-compat": [ + "crate2nix", + "crate2nix_stable" + ], "nixpkgs": "nixpkgs_2", - "pre-commit-hooks": ["crate2nix", "crate2nix_stable"] + "pre-commit-hooks": [ + "crate2nix", + "crate2nix_stable" + ] }, "locked": { "lastModified": 1716549461, @@ -46,8 +61,16 @@ }, "cachix_3": { "inputs": { - "devenv": ["crate2nix", "crate2nix_stable", "crate2nix_stable"], - "flake-compat": ["crate2nix", "crate2nix_stable", "crate2nix_stable"], + "devenv": [ + "crate2nix", + "crate2nix_stable", + "crate2nix_stable" + ], + "flake-compat": [ + "crate2nix", + "crate2nix_stable", + "crate2nix_stable" + ], "nixpkgs": "nixpkgs_3", "pre-commit-hooks": [ "crate2nix", @@ -78,7 +101,10 @@ "flake-compat": "flake-compat_3", "flake-parts": "flake-parts_3", "nix-test-runner": "nix-test-runner_3", - "nixpkgs": ["tgi-nix", "nixpkgs"], + "nixpkgs": [ + "tgi-nix", + "nixpkgs" + ], "pre-commit-hooks": "pre-commit-hooks_3" }, "locked": { @@ -193,7 +219,11 @@ "devshell_2": { "inputs": { "flake-utils": "flake-utils_3", - "nixpkgs": ["crate2nix", "crate2nix_stable", "nixpkgs"] + "nixpkgs": [ + "crate2nix", + "crate2nix_stable", + "nixpkgs" + ] }, "locked": { "lastModified": 1717408969, @@ -212,7 +242,10 @@ "devshell_3": { "inputs": { "flake-utils": "flake-utils_4", - "nixpkgs": ["crate2nix", "nixpkgs"] + "nixpkgs": [ + "crate2nix", + "nixpkgs" + ] }, "locked": { "lastModified": 1711099426, @@ -310,7 +343,11 @@ }, "flake-parts_2": { "inputs": { - "nixpkgs-lib": ["crate2nix", "crate2nix_stable", "nixpkgs"] + "nixpkgs-lib": [ + "crate2nix", + "crate2nix_stable", + "nixpkgs" + ] }, "locked": { "lastModified": 1719745305, @@ -328,7 +365,10 @@ }, "flake-parts_3": { "inputs": { - "nixpkgs-lib": ["crate2nix", "nixpkgs"] + "nixpkgs-lib": [ + "crate2nix", + "nixpkgs" + ] }, "locked": { "lastModified": 1712014858, @@ -519,7 +559,11 @@ }, "gitignore_3": { "inputs": { - "nixpkgs": ["crate2nix", "pre-commit-hooks", "nixpkgs"] + "nixpkgs": [ + "crate2nix", + "pre-commit-hooks", + "nixpkgs" + ] }, "locked": { "lastModified": 1709087332, @@ -726,10 +770,22 @@ }, "pre-commit-hooks_2": { "inputs": { - "flake-compat": ["crate2nix", "crate2nix_stable", "flake-compat"], + "flake-compat": [ + "crate2nix", + "crate2nix_stable", + "flake-compat" + ], "gitignore": "gitignore_2", - "nixpkgs": ["crate2nix", "crate2nix_stable", "nixpkgs"], - "nixpkgs-stable": ["crate2nix", "crate2nix_stable", "nixpkgs"] + "nixpkgs": [ + "crate2nix", + "crate2nix_stable", + "nixpkgs" + ], + "nixpkgs-stable": [ + "crate2nix", + "crate2nix_stable", + "nixpkgs" + ] }, "locked": { "lastModified": 1719259945, @@ -747,11 +803,20 @@ }, "pre-commit-hooks_3": { "inputs": { - "flake-compat": ["crate2nix", "flake-compat"], + "flake-compat": [ + "crate2nix", + "flake-compat" + ], "flake-utils": "flake-utils_5", "gitignore": "gitignore_3", - "nixpkgs": ["crate2nix", "nixpkgs"], - "nixpkgs-stable": ["crate2nix", "nixpkgs"] + "nixpkgs": [ + "crate2nix", + "nixpkgs" + ], + "nixpkgs-stable": [ + "crate2nix", + "nixpkgs" + ] }, "locked": { "lastModified": 1712055707, @@ -772,14 +837,20 @@ "crate2nix": "crate2nix", "flake-utils": "flake-utils_6", "nix-filter": "nix-filter", - "nixpkgs": ["tgi-nix", "nixpkgs"], + "nixpkgs": [ + "tgi-nix", + "nixpkgs" + ], "rust-overlay": "rust-overlay", "tgi-nix": "tgi-nix" } }, "rust-overlay": { "inputs": { - "nixpkgs": ["tgi-nix", "nixpkgs"] + "nixpkgs": [ + "tgi-nix", + "nixpkgs" + ] }, "locked": { "lastModified": 1729045942, @@ -907,15 +978,16 @@ "nixpkgs": "nixpkgs_6" }, "locked": { - "lastModified": 1731674227, - "narHash": "sha256-k/ur37KSc+RXcwwz0tgxeamz6wQ5rsOe5hMepzIdD2s=", + "lastModified": 1731923801, + "narHash": "sha256-SVtXtTGgnKjwPwMLe030l/DVhcm1vH4fXM7tUAPYOZc=", "owner": "huggingface", "repo": "text-generation-inference-nix", - "rev": "407b9e22a0b7121bf6e171d67ce0144e3f3e39bf", + "rev": "b87d4b5bede0ffed7da50e9a5246b133c7d618dc", "type": "github" }, "original": { "owner": "huggingface", + "ref": "marlin-kernels-0.3.5", "repo": "text-generation-inference-nix", "type": "github" } diff --git a/flake.nix b/flake.nix index f26a983e..cdde7a4c 100644 --- a/flake.nix +++ b/flake.nix @@ -5,7 +5,7 @@ inputs.nixpkgs.follows = "tgi-nix/nixpkgs"; }; nix-filter.url = "github:numtide/nix-filter"; - tgi-nix.url = "github:huggingface/text-generation-inference-nix"; + tgi-nix.url = "github:huggingface/text-generation-inference-nix/marlin-kernels-0.3.5"; nixpkgs.follows = "tgi-nix/nixpkgs"; flake-utils.url = "github:numtide/flake-utils"; rust-overlay = { diff --git a/server/poetry.lock b/server/poetry.lock index baf60805..b3f75a45 100644 --- a/server/poetry.lock +++ b/server/poetry.lock @@ -1288,12 +1288,12 @@ files = [ [[package]] name = "marlin-kernels" -version = "0.3.3" +version = "0.3.5" description = "Marlin quantization kernels" optional = true python-versions = ">=3.7" files = [ - {file = "marlin_kernels-0.3.3+cu123torch2.4-cp310-cp310-linux_x86_64.whl", hash = "sha256:36da87e2d486083147c16845f193a438602a0dbc7a0ffb908fbec416c05c5951"}, + {file = "marlin_kernels-0.3.5+cu123torch2.4-cp310-cp310-linux_x86_64.whl", hash = "sha256:58d4bf0aa1a9533acc05f1e5bf50f727ed0129848d1fa1feb2c5c3fa482518d4"}, ] [package.dependencies] @@ -1301,16 +1301,16 @@ torch = "*" [package.source] type = "url" -url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.3/marlin_kernels-0.3.3+cu123torch2.4-cp310-cp310-linux_x86_64.whl" +url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.5/marlin_kernels-0.3.5+cu123torch2.4-cp310-cp310-linux_x86_64.whl" [[package]] name = "marlin-kernels" -version = "0.3.3" +version = "0.3.5" description = "Marlin quantization kernels" optional = true python-versions = ">=3.7" files = [ - {file = "marlin_kernels-0.3.3+cu123torch2.4-cp311-cp311-linux_x86_64.whl", hash = "sha256:c9ca340acdf27df009bf23ee1f37978f999b1a1378736dc3306df27eb48e364d"}, + {file = "marlin_kernels-0.3.5+cu123torch2.4-cp311-cp311-linux_x86_64.whl", hash = "sha256:a3a3653e6908db013ca96979a5ee1f6a8bb590ee7506a129e06b87d4a8cbb87d"}, ] [package.dependencies] @@ -1318,16 +1318,16 @@ torch = "*" [package.source] type = "url" -url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.3/marlin_kernels-0.3.3+cu123torch2.4-cp311-cp311-linux_x86_64.whl" +url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.5/marlin_kernels-0.3.5+cu123torch2.4-cp311-cp311-linux_x86_64.whl" [[package]] name = "marlin-kernels" -version = "0.3.3" +version = "0.3.5" description = "Marlin quantization kernels" optional = true python-versions = ">=3.7" files = [ - {file = "marlin_kernels-0.3.3+cu123torch2.4-cp312-cp312-linux_x86_64.whl", hash = "sha256:21e36b4880fc630882c8265e0cd27b379e40b1b87512f92a321506f4e5397d26"}, + {file = "marlin_kernels-0.3.5+cu123torch2.4-cp312-cp312-linux_x86_64.whl", hash = "sha256:967b4765a591530a4b9160ae32f3f352a89ae4c71daf43220c99976987d76723"}, ] [package.dependencies] @@ -1335,16 +1335,16 @@ torch = "*" [package.source] type = "url" -url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.3/marlin_kernels-0.3.3+cu123torch2.4-cp312-cp312-linux_x86_64.whl" +url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.5/marlin_kernels-0.3.5+cu123torch2.4-cp312-cp312-linux_x86_64.whl" [[package]] name = "marlin-kernels" -version = "0.3.3" +version = "0.3.5" description = "Marlin quantization kernels" optional = true python-versions = ">=3.7" files = [ - {file = "marlin_kernels-0.3.3+cu123torch2.4-cp39-cp39-linux_x86_64.whl", hash = "sha256:cdbf46e68313f76e9648ce7255353763cadbe14b7a789e01f5d502b76d64ee35"}, + {file = "marlin_kernels-0.3.5+cu123torch2.4-cp39-cp39-linux_x86_64.whl", hash = "sha256:fbe607d5afd1e1fca6e294c3594a0ec279d1f9ea6a2fdf7f34ccb6180d15e195"}, ] [package.dependencies] @@ -1352,7 +1352,7 @@ torch = "*" [package.source] type = "url" -url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.3/marlin_kernels-0.3.3+cu123torch2.4-cp39-cp39-linux_x86_64.whl" +url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.5/marlin_kernels-0.3.5+cu123torch2.4-cp39-cp39-linux_x86_64.whl" [[package]] name = "mdurl" @@ -4066,4 +4066,4 @@ torch = ["torch"] [metadata] lock-version = "2.0" python-versions = ">=3.9,<3.13" -content-hash = "05add88628d836faceae1a26fde4092651a6eca74555ae38ebff879a7895be7e" +content-hash = "b889115cee7f1969856f233e74721965f692e40d2a1c2fceccaf6b3bdb19680d" diff --git a/server/pyproject.toml b/server/pyproject.toml index bc54c367..194b04da 100644 --- a/server/pyproject.toml +++ b/server/pyproject.toml @@ -48,10 +48,10 @@ attention-kernels = [ { url = "https://github.com/danieldk/attention-kernels/releases/download/v0.1.1/attention_kernels-0.1.1+cu123torch2.4-cp312-cp312-linux_x86_64.whl", python = "~3.12", optional = true }, ] marlin-kernels = [ - { url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.3/marlin_kernels-0.3.3+cu123torch2.4-cp39-cp39-linux_x86_64.whl", python = "~3.9", optional = true }, - { url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.3/marlin_kernels-0.3.3+cu123torch2.4-cp310-cp310-linux_x86_64.whl", python = "~3.10", optional = true }, - { url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.3/marlin_kernels-0.3.3+cu123torch2.4-cp311-cp311-linux_x86_64.whl", python = "~3.11", optional = true }, - { url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.3/marlin_kernels-0.3.3+cu123torch2.4-cp312-cp312-linux_x86_64.whl", python = "~3.12", optional = true }, + { url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.5/marlin_kernels-0.3.5+cu123torch2.4-cp39-cp39-linux_x86_64.whl", python = "~3.9", optional = true }, + { url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.5/marlin_kernels-0.3.5+cu123torch2.4-cp310-cp310-linux_x86_64.whl", python = "~3.10", optional = true }, + { url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.5/marlin_kernels-0.3.5+cu123torch2.4-cp311-cp311-linux_x86_64.whl", python = "~3.11", optional = true }, + { url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.5/marlin_kernels-0.3.5+cu123torch2.4-cp312-cp312-linux_x86_64.whl", python = "~3.12", optional = true }, ] moe-kernels = [ { url = "https://github.com/danieldk/moe-kernels/releases/download/v0.6.0/moe_kernels-0.6.0+cu123torch2.4-cp39-cp39-linux_x86_64.whl", python = "~3.9", optional = true },