Use marlin-kernels 0.3.5

This commit is contained in:
Daniël de Kok 2024-11-18 12:43:12 +00:00
parent f76c0ff17f
commit e0018723fc
4 changed files with 115 additions and 43 deletions

View File

@ -2,10 +2,16 @@
"nodes": { "nodes": {
"cachix": { "cachix": {
"inputs": { "inputs": {
"devenv": ["crate2nix"], "devenv": [
"flake-compat": ["crate2nix"], "crate2nix"
],
"flake-compat": [
"crate2nix"
],
"nixpkgs": "nixpkgs", "nixpkgs": "nixpkgs",
"pre-commit-hooks": ["crate2nix"] "pre-commit-hooks": [
"crate2nix"
]
}, },
"locked": { "locked": {
"lastModified": 1709700175, "lastModified": 1709700175,
@ -24,10 +30,19 @@
}, },
"cachix_2": { "cachix_2": {
"inputs": { "inputs": {
"devenv": ["crate2nix", "crate2nix_stable"], "devenv": [
"flake-compat": ["crate2nix", "crate2nix_stable"], "crate2nix",
"crate2nix_stable"
],
"flake-compat": [
"crate2nix",
"crate2nix_stable"
],
"nixpkgs": "nixpkgs_2", "nixpkgs": "nixpkgs_2",
"pre-commit-hooks": ["crate2nix", "crate2nix_stable"] "pre-commit-hooks": [
"crate2nix",
"crate2nix_stable"
]
}, },
"locked": { "locked": {
"lastModified": 1716549461, "lastModified": 1716549461,
@ -46,8 +61,16 @@
}, },
"cachix_3": { "cachix_3": {
"inputs": { "inputs": {
"devenv": ["crate2nix", "crate2nix_stable", "crate2nix_stable"], "devenv": [
"flake-compat": ["crate2nix", "crate2nix_stable", "crate2nix_stable"], "crate2nix",
"crate2nix_stable",
"crate2nix_stable"
],
"flake-compat": [
"crate2nix",
"crate2nix_stable",
"crate2nix_stable"
],
"nixpkgs": "nixpkgs_3", "nixpkgs": "nixpkgs_3",
"pre-commit-hooks": [ "pre-commit-hooks": [
"crate2nix", "crate2nix",
@ -78,7 +101,10 @@
"flake-compat": "flake-compat_3", "flake-compat": "flake-compat_3",
"flake-parts": "flake-parts_3", "flake-parts": "flake-parts_3",
"nix-test-runner": "nix-test-runner_3", "nix-test-runner": "nix-test-runner_3",
"nixpkgs": ["tgi-nix", "nixpkgs"], "nixpkgs": [
"tgi-nix",
"nixpkgs"
],
"pre-commit-hooks": "pre-commit-hooks_3" "pre-commit-hooks": "pre-commit-hooks_3"
}, },
"locked": { "locked": {
@ -193,7 +219,11 @@
"devshell_2": { "devshell_2": {
"inputs": { "inputs": {
"flake-utils": "flake-utils_3", "flake-utils": "flake-utils_3",
"nixpkgs": ["crate2nix", "crate2nix_stable", "nixpkgs"] "nixpkgs": [
"crate2nix",
"crate2nix_stable",
"nixpkgs"
]
}, },
"locked": { "locked": {
"lastModified": 1717408969, "lastModified": 1717408969,
@ -212,7 +242,10 @@
"devshell_3": { "devshell_3": {
"inputs": { "inputs": {
"flake-utils": "flake-utils_4", "flake-utils": "flake-utils_4",
"nixpkgs": ["crate2nix", "nixpkgs"] "nixpkgs": [
"crate2nix",
"nixpkgs"
]
}, },
"locked": { "locked": {
"lastModified": 1711099426, "lastModified": 1711099426,
@ -310,7 +343,11 @@
}, },
"flake-parts_2": { "flake-parts_2": {
"inputs": { "inputs": {
"nixpkgs-lib": ["crate2nix", "crate2nix_stable", "nixpkgs"] "nixpkgs-lib": [
"crate2nix",
"crate2nix_stable",
"nixpkgs"
]
}, },
"locked": { "locked": {
"lastModified": 1719745305, "lastModified": 1719745305,
@ -328,7 +365,10 @@
}, },
"flake-parts_3": { "flake-parts_3": {
"inputs": { "inputs": {
"nixpkgs-lib": ["crate2nix", "nixpkgs"] "nixpkgs-lib": [
"crate2nix",
"nixpkgs"
]
}, },
"locked": { "locked": {
"lastModified": 1712014858, "lastModified": 1712014858,
@ -519,7 +559,11 @@
}, },
"gitignore_3": { "gitignore_3": {
"inputs": { "inputs": {
"nixpkgs": ["crate2nix", "pre-commit-hooks", "nixpkgs"] "nixpkgs": [
"crate2nix",
"pre-commit-hooks",
"nixpkgs"
]
}, },
"locked": { "locked": {
"lastModified": 1709087332, "lastModified": 1709087332,
@ -726,10 +770,22 @@
}, },
"pre-commit-hooks_2": { "pre-commit-hooks_2": {
"inputs": { "inputs": {
"flake-compat": ["crate2nix", "crate2nix_stable", "flake-compat"], "flake-compat": [
"crate2nix",
"crate2nix_stable",
"flake-compat"
],
"gitignore": "gitignore_2", "gitignore": "gitignore_2",
"nixpkgs": ["crate2nix", "crate2nix_stable", "nixpkgs"], "nixpkgs": [
"nixpkgs-stable": ["crate2nix", "crate2nix_stable", "nixpkgs"] "crate2nix",
"crate2nix_stable",
"nixpkgs"
],
"nixpkgs-stable": [
"crate2nix",
"crate2nix_stable",
"nixpkgs"
]
}, },
"locked": { "locked": {
"lastModified": 1719259945, "lastModified": 1719259945,
@ -747,11 +803,20 @@
}, },
"pre-commit-hooks_3": { "pre-commit-hooks_3": {
"inputs": { "inputs": {
"flake-compat": ["crate2nix", "flake-compat"], "flake-compat": [
"crate2nix",
"flake-compat"
],
"flake-utils": "flake-utils_5", "flake-utils": "flake-utils_5",
"gitignore": "gitignore_3", "gitignore": "gitignore_3",
"nixpkgs": ["crate2nix", "nixpkgs"], "nixpkgs": [
"nixpkgs-stable": ["crate2nix", "nixpkgs"] "crate2nix",
"nixpkgs"
],
"nixpkgs-stable": [
"crate2nix",
"nixpkgs"
]
}, },
"locked": { "locked": {
"lastModified": 1712055707, "lastModified": 1712055707,
@ -772,14 +837,20 @@
"crate2nix": "crate2nix", "crate2nix": "crate2nix",
"flake-utils": "flake-utils_6", "flake-utils": "flake-utils_6",
"nix-filter": "nix-filter", "nix-filter": "nix-filter",
"nixpkgs": ["tgi-nix", "nixpkgs"], "nixpkgs": [
"tgi-nix",
"nixpkgs"
],
"rust-overlay": "rust-overlay", "rust-overlay": "rust-overlay",
"tgi-nix": "tgi-nix" "tgi-nix": "tgi-nix"
} }
}, },
"rust-overlay": { "rust-overlay": {
"inputs": { "inputs": {
"nixpkgs": ["tgi-nix", "nixpkgs"] "nixpkgs": [
"tgi-nix",
"nixpkgs"
]
}, },
"locked": { "locked": {
"lastModified": 1729045942, "lastModified": 1729045942,
@ -907,15 +978,16 @@
"nixpkgs": "nixpkgs_6" "nixpkgs": "nixpkgs_6"
}, },
"locked": { "locked": {
"lastModified": 1731674227, "lastModified": 1731923801,
"narHash": "sha256-k/ur37KSc+RXcwwz0tgxeamz6wQ5rsOe5hMepzIdD2s=", "narHash": "sha256-SVtXtTGgnKjwPwMLe030l/DVhcm1vH4fXM7tUAPYOZc=",
"owner": "huggingface", "owner": "huggingface",
"repo": "text-generation-inference-nix", "repo": "text-generation-inference-nix",
"rev": "407b9e22a0b7121bf6e171d67ce0144e3f3e39bf", "rev": "b87d4b5bede0ffed7da50e9a5246b133c7d618dc",
"type": "github" "type": "github"
}, },
"original": { "original": {
"owner": "huggingface", "owner": "huggingface",
"ref": "marlin-kernels-0.3.5",
"repo": "text-generation-inference-nix", "repo": "text-generation-inference-nix",
"type": "github" "type": "github"
} }

View File

@ -5,7 +5,7 @@
inputs.nixpkgs.follows = "tgi-nix/nixpkgs"; inputs.nixpkgs.follows = "tgi-nix/nixpkgs";
}; };
nix-filter.url = "github:numtide/nix-filter"; nix-filter.url = "github:numtide/nix-filter";
tgi-nix.url = "github:huggingface/text-generation-inference-nix"; tgi-nix.url = "github:huggingface/text-generation-inference-nix/marlin-kernels-0.3.5";
nixpkgs.follows = "tgi-nix/nixpkgs"; nixpkgs.follows = "tgi-nix/nixpkgs";
flake-utils.url = "github:numtide/flake-utils"; flake-utils.url = "github:numtide/flake-utils";
rust-overlay = { rust-overlay = {

26
server/poetry.lock generated
View File

@ -1288,12 +1288,12 @@ files = [
[[package]] [[package]]
name = "marlin-kernels" name = "marlin-kernels"
version = "0.3.3" version = "0.3.5"
description = "Marlin quantization kernels" description = "Marlin quantization kernels"
optional = true optional = true
python-versions = ">=3.7" python-versions = ">=3.7"
files = [ files = [
{file = "marlin_kernels-0.3.3+cu123torch2.4-cp310-cp310-linux_x86_64.whl", hash = "sha256:36da87e2d486083147c16845f193a438602a0dbc7a0ffb908fbec416c05c5951"}, {file = "marlin_kernels-0.3.5+cu123torch2.4-cp310-cp310-linux_x86_64.whl", hash = "sha256:58d4bf0aa1a9533acc05f1e5bf50f727ed0129848d1fa1feb2c5c3fa482518d4"},
] ]
[package.dependencies] [package.dependencies]
@ -1301,16 +1301,16 @@ torch = "*"
[package.source] [package.source]
type = "url" type = "url"
url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.3/marlin_kernels-0.3.3+cu123torch2.4-cp310-cp310-linux_x86_64.whl" url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.5/marlin_kernels-0.3.5+cu123torch2.4-cp310-cp310-linux_x86_64.whl"
[[package]] [[package]]
name = "marlin-kernels" name = "marlin-kernels"
version = "0.3.3" version = "0.3.5"
description = "Marlin quantization kernels" description = "Marlin quantization kernels"
optional = true optional = true
python-versions = ">=3.7" python-versions = ">=3.7"
files = [ files = [
{file = "marlin_kernels-0.3.3+cu123torch2.4-cp311-cp311-linux_x86_64.whl", hash = "sha256:c9ca340acdf27df009bf23ee1f37978f999b1a1378736dc3306df27eb48e364d"}, {file = "marlin_kernels-0.3.5+cu123torch2.4-cp311-cp311-linux_x86_64.whl", hash = "sha256:a3a3653e6908db013ca96979a5ee1f6a8bb590ee7506a129e06b87d4a8cbb87d"},
] ]
[package.dependencies] [package.dependencies]
@ -1318,16 +1318,16 @@ torch = "*"
[package.source] [package.source]
type = "url" type = "url"
url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.3/marlin_kernels-0.3.3+cu123torch2.4-cp311-cp311-linux_x86_64.whl" url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.5/marlin_kernels-0.3.5+cu123torch2.4-cp311-cp311-linux_x86_64.whl"
[[package]] [[package]]
name = "marlin-kernels" name = "marlin-kernels"
version = "0.3.3" version = "0.3.5"
description = "Marlin quantization kernels" description = "Marlin quantization kernels"
optional = true optional = true
python-versions = ">=3.7" python-versions = ">=3.7"
files = [ files = [
{file = "marlin_kernels-0.3.3+cu123torch2.4-cp312-cp312-linux_x86_64.whl", hash = "sha256:21e36b4880fc630882c8265e0cd27b379e40b1b87512f92a321506f4e5397d26"}, {file = "marlin_kernels-0.3.5+cu123torch2.4-cp312-cp312-linux_x86_64.whl", hash = "sha256:967b4765a591530a4b9160ae32f3f352a89ae4c71daf43220c99976987d76723"},
] ]
[package.dependencies] [package.dependencies]
@ -1335,16 +1335,16 @@ torch = "*"
[package.source] [package.source]
type = "url" type = "url"
url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.3/marlin_kernels-0.3.3+cu123torch2.4-cp312-cp312-linux_x86_64.whl" url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.5/marlin_kernels-0.3.5+cu123torch2.4-cp312-cp312-linux_x86_64.whl"
[[package]] [[package]]
name = "marlin-kernels" name = "marlin-kernels"
version = "0.3.3" version = "0.3.5"
description = "Marlin quantization kernels" description = "Marlin quantization kernels"
optional = true optional = true
python-versions = ">=3.7" python-versions = ">=3.7"
files = [ files = [
{file = "marlin_kernels-0.3.3+cu123torch2.4-cp39-cp39-linux_x86_64.whl", hash = "sha256:cdbf46e68313f76e9648ce7255353763cadbe14b7a789e01f5d502b76d64ee35"}, {file = "marlin_kernels-0.3.5+cu123torch2.4-cp39-cp39-linux_x86_64.whl", hash = "sha256:fbe607d5afd1e1fca6e294c3594a0ec279d1f9ea6a2fdf7f34ccb6180d15e195"},
] ]
[package.dependencies] [package.dependencies]
@ -1352,7 +1352,7 @@ torch = "*"
[package.source] [package.source]
type = "url" type = "url"
url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.3/marlin_kernels-0.3.3+cu123torch2.4-cp39-cp39-linux_x86_64.whl" url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.5/marlin_kernels-0.3.5+cu123torch2.4-cp39-cp39-linux_x86_64.whl"
[[package]] [[package]]
name = "mdurl" name = "mdurl"
@ -4066,4 +4066,4 @@ torch = ["torch"]
[metadata] [metadata]
lock-version = "2.0" lock-version = "2.0"
python-versions = ">=3.9,<3.13" python-versions = ">=3.9,<3.13"
content-hash = "05add88628d836faceae1a26fde4092651a6eca74555ae38ebff879a7895be7e" content-hash = "b889115cee7f1969856f233e74721965f692e40d2a1c2fceccaf6b3bdb19680d"

View File

@ -48,10 +48,10 @@ attention-kernels = [
{ url = "https://github.com/danieldk/attention-kernels/releases/download/v0.1.1/attention_kernels-0.1.1+cu123torch2.4-cp312-cp312-linux_x86_64.whl", python = "~3.12", optional = true }, { url = "https://github.com/danieldk/attention-kernels/releases/download/v0.1.1/attention_kernels-0.1.1+cu123torch2.4-cp312-cp312-linux_x86_64.whl", python = "~3.12", optional = true },
] ]
marlin-kernels = [ marlin-kernels = [
{ url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.3/marlin_kernels-0.3.3+cu123torch2.4-cp39-cp39-linux_x86_64.whl", python = "~3.9", optional = true }, { url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.5/marlin_kernels-0.3.5+cu123torch2.4-cp39-cp39-linux_x86_64.whl", python = "~3.9", optional = true },
{ url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.3/marlin_kernels-0.3.3+cu123torch2.4-cp310-cp310-linux_x86_64.whl", python = "~3.10", optional = true }, { url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.5/marlin_kernels-0.3.5+cu123torch2.4-cp310-cp310-linux_x86_64.whl", python = "~3.10", optional = true },
{ url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.3/marlin_kernels-0.3.3+cu123torch2.4-cp311-cp311-linux_x86_64.whl", python = "~3.11", optional = true }, { url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.5/marlin_kernels-0.3.5+cu123torch2.4-cp311-cp311-linux_x86_64.whl", python = "~3.11", optional = true },
{ url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.3/marlin_kernels-0.3.3+cu123torch2.4-cp312-cp312-linux_x86_64.whl", python = "~3.12", optional = true }, { url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.5/marlin_kernels-0.3.5+cu123torch2.4-cp312-cp312-linux_x86_64.whl", python = "~3.12", optional = true },
] ]
moe-kernels = [ moe-kernels = [
{ url = "https://github.com/danieldk/moe-kernels/releases/download/v0.6.0/moe_kernels-0.6.0+cu123torch2.4-cp39-cp39-linux_x86_64.whl", python = "~3.9", optional = true }, { url = "https://github.com/danieldk/moe-kernels/releases/download/v0.6.0/moe_kernels-0.6.0+cu123torch2.4-cp39-cp39-linux_x86_64.whl", python = "~3.9", optional = true },