mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 20:34:54 +00:00
Use marlin-kernels 0.3.5
This commit is contained in:
parent
f76c0ff17f
commit
e0018723fc
122
flake.lock
122
flake.lock
@ -2,10 +2,16 @@
|
|||||||
"nodes": {
|
"nodes": {
|
||||||
"cachix": {
|
"cachix": {
|
||||||
"inputs": {
|
"inputs": {
|
||||||
"devenv": ["crate2nix"],
|
"devenv": [
|
||||||
"flake-compat": ["crate2nix"],
|
"crate2nix"
|
||||||
|
],
|
||||||
|
"flake-compat": [
|
||||||
|
"crate2nix"
|
||||||
|
],
|
||||||
"nixpkgs": "nixpkgs",
|
"nixpkgs": "nixpkgs",
|
||||||
"pre-commit-hooks": ["crate2nix"]
|
"pre-commit-hooks": [
|
||||||
|
"crate2nix"
|
||||||
|
]
|
||||||
},
|
},
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1709700175,
|
"lastModified": 1709700175,
|
||||||
@ -24,10 +30,19 @@
|
|||||||
},
|
},
|
||||||
"cachix_2": {
|
"cachix_2": {
|
||||||
"inputs": {
|
"inputs": {
|
||||||
"devenv": ["crate2nix", "crate2nix_stable"],
|
"devenv": [
|
||||||
"flake-compat": ["crate2nix", "crate2nix_stable"],
|
"crate2nix",
|
||||||
|
"crate2nix_stable"
|
||||||
|
],
|
||||||
|
"flake-compat": [
|
||||||
|
"crate2nix",
|
||||||
|
"crate2nix_stable"
|
||||||
|
],
|
||||||
"nixpkgs": "nixpkgs_2",
|
"nixpkgs": "nixpkgs_2",
|
||||||
"pre-commit-hooks": ["crate2nix", "crate2nix_stable"]
|
"pre-commit-hooks": [
|
||||||
|
"crate2nix",
|
||||||
|
"crate2nix_stable"
|
||||||
|
]
|
||||||
},
|
},
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1716549461,
|
"lastModified": 1716549461,
|
||||||
@ -46,8 +61,16 @@
|
|||||||
},
|
},
|
||||||
"cachix_3": {
|
"cachix_3": {
|
||||||
"inputs": {
|
"inputs": {
|
||||||
"devenv": ["crate2nix", "crate2nix_stable", "crate2nix_stable"],
|
"devenv": [
|
||||||
"flake-compat": ["crate2nix", "crate2nix_stable", "crate2nix_stable"],
|
"crate2nix",
|
||||||
|
"crate2nix_stable",
|
||||||
|
"crate2nix_stable"
|
||||||
|
],
|
||||||
|
"flake-compat": [
|
||||||
|
"crate2nix",
|
||||||
|
"crate2nix_stable",
|
||||||
|
"crate2nix_stable"
|
||||||
|
],
|
||||||
"nixpkgs": "nixpkgs_3",
|
"nixpkgs": "nixpkgs_3",
|
||||||
"pre-commit-hooks": [
|
"pre-commit-hooks": [
|
||||||
"crate2nix",
|
"crate2nix",
|
||||||
@ -78,7 +101,10 @@
|
|||||||
"flake-compat": "flake-compat_3",
|
"flake-compat": "flake-compat_3",
|
||||||
"flake-parts": "flake-parts_3",
|
"flake-parts": "flake-parts_3",
|
||||||
"nix-test-runner": "nix-test-runner_3",
|
"nix-test-runner": "nix-test-runner_3",
|
||||||
"nixpkgs": ["tgi-nix", "nixpkgs"],
|
"nixpkgs": [
|
||||||
|
"tgi-nix",
|
||||||
|
"nixpkgs"
|
||||||
|
],
|
||||||
"pre-commit-hooks": "pre-commit-hooks_3"
|
"pre-commit-hooks": "pre-commit-hooks_3"
|
||||||
},
|
},
|
||||||
"locked": {
|
"locked": {
|
||||||
@ -193,7 +219,11 @@
|
|||||||
"devshell_2": {
|
"devshell_2": {
|
||||||
"inputs": {
|
"inputs": {
|
||||||
"flake-utils": "flake-utils_3",
|
"flake-utils": "flake-utils_3",
|
||||||
"nixpkgs": ["crate2nix", "crate2nix_stable", "nixpkgs"]
|
"nixpkgs": [
|
||||||
|
"crate2nix",
|
||||||
|
"crate2nix_stable",
|
||||||
|
"nixpkgs"
|
||||||
|
]
|
||||||
},
|
},
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1717408969,
|
"lastModified": 1717408969,
|
||||||
@ -212,7 +242,10 @@
|
|||||||
"devshell_3": {
|
"devshell_3": {
|
||||||
"inputs": {
|
"inputs": {
|
||||||
"flake-utils": "flake-utils_4",
|
"flake-utils": "flake-utils_4",
|
||||||
"nixpkgs": ["crate2nix", "nixpkgs"]
|
"nixpkgs": [
|
||||||
|
"crate2nix",
|
||||||
|
"nixpkgs"
|
||||||
|
]
|
||||||
},
|
},
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1711099426,
|
"lastModified": 1711099426,
|
||||||
@ -310,7 +343,11 @@
|
|||||||
},
|
},
|
||||||
"flake-parts_2": {
|
"flake-parts_2": {
|
||||||
"inputs": {
|
"inputs": {
|
||||||
"nixpkgs-lib": ["crate2nix", "crate2nix_stable", "nixpkgs"]
|
"nixpkgs-lib": [
|
||||||
|
"crate2nix",
|
||||||
|
"crate2nix_stable",
|
||||||
|
"nixpkgs"
|
||||||
|
]
|
||||||
},
|
},
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1719745305,
|
"lastModified": 1719745305,
|
||||||
@ -328,7 +365,10 @@
|
|||||||
},
|
},
|
||||||
"flake-parts_3": {
|
"flake-parts_3": {
|
||||||
"inputs": {
|
"inputs": {
|
||||||
"nixpkgs-lib": ["crate2nix", "nixpkgs"]
|
"nixpkgs-lib": [
|
||||||
|
"crate2nix",
|
||||||
|
"nixpkgs"
|
||||||
|
]
|
||||||
},
|
},
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1712014858,
|
"lastModified": 1712014858,
|
||||||
@ -519,7 +559,11 @@
|
|||||||
},
|
},
|
||||||
"gitignore_3": {
|
"gitignore_3": {
|
||||||
"inputs": {
|
"inputs": {
|
||||||
"nixpkgs": ["crate2nix", "pre-commit-hooks", "nixpkgs"]
|
"nixpkgs": [
|
||||||
|
"crate2nix",
|
||||||
|
"pre-commit-hooks",
|
||||||
|
"nixpkgs"
|
||||||
|
]
|
||||||
},
|
},
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1709087332,
|
"lastModified": 1709087332,
|
||||||
@ -726,10 +770,22 @@
|
|||||||
},
|
},
|
||||||
"pre-commit-hooks_2": {
|
"pre-commit-hooks_2": {
|
||||||
"inputs": {
|
"inputs": {
|
||||||
"flake-compat": ["crate2nix", "crate2nix_stable", "flake-compat"],
|
"flake-compat": [
|
||||||
|
"crate2nix",
|
||||||
|
"crate2nix_stable",
|
||||||
|
"flake-compat"
|
||||||
|
],
|
||||||
"gitignore": "gitignore_2",
|
"gitignore": "gitignore_2",
|
||||||
"nixpkgs": ["crate2nix", "crate2nix_stable", "nixpkgs"],
|
"nixpkgs": [
|
||||||
"nixpkgs-stable": ["crate2nix", "crate2nix_stable", "nixpkgs"]
|
"crate2nix",
|
||||||
|
"crate2nix_stable",
|
||||||
|
"nixpkgs"
|
||||||
|
],
|
||||||
|
"nixpkgs-stable": [
|
||||||
|
"crate2nix",
|
||||||
|
"crate2nix_stable",
|
||||||
|
"nixpkgs"
|
||||||
|
]
|
||||||
},
|
},
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1719259945,
|
"lastModified": 1719259945,
|
||||||
@ -747,11 +803,20 @@
|
|||||||
},
|
},
|
||||||
"pre-commit-hooks_3": {
|
"pre-commit-hooks_3": {
|
||||||
"inputs": {
|
"inputs": {
|
||||||
"flake-compat": ["crate2nix", "flake-compat"],
|
"flake-compat": [
|
||||||
|
"crate2nix",
|
||||||
|
"flake-compat"
|
||||||
|
],
|
||||||
"flake-utils": "flake-utils_5",
|
"flake-utils": "flake-utils_5",
|
||||||
"gitignore": "gitignore_3",
|
"gitignore": "gitignore_3",
|
||||||
"nixpkgs": ["crate2nix", "nixpkgs"],
|
"nixpkgs": [
|
||||||
"nixpkgs-stable": ["crate2nix", "nixpkgs"]
|
"crate2nix",
|
||||||
|
"nixpkgs"
|
||||||
|
],
|
||||||
|
"nixpkgs-stable": [
|
||||||
|
"crate2nix",
|
||||||
|
"nixpkgs"
|
||||||
|
]
|
||||||
},
|
},
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1712055707,
|
"lastModified": 1712055707,
|
||||||
@ -772,14 +837,20 @@
|
|||||||
"crate2nix": "crate2nix",
|
"crate2nix": "crate2nix",
|
||||||
"flake-utils": "flake-utils_6",
|
"flake-utils": "flake-utils_6",
|
||||||
"nix-filter": "nix-filter",
|
"nix-filter": "nix-filter",
|
||||||
"nixpkgs": ["tgi-nix", "nixpkgs"],
|
"nixpkgs": [
|
||||||
|
"tgi-nix",
|
||||||
|
"nixpkgs"
|
||||||
|
],
|
||||||
"rust-overlay": "rust-overlay",
|
"rust-overlay": "rust-overlay",
|
||||||
"tgi-nix": "tgi-nix"
|
"tgi-nix": "tgi-nix"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"rust-overlay": {
|
"rust-overlay": {
|
||||||
"inputs": {
|
"inputs": {
|
||||||
"nixpkgs": ["tgi-nix", "nixpkgs"]
|
"nixpkgs": [
|
||||||
|
"tgi-nix",
|
||||||
|
"nixpkgs"
|
||||||
|
]
|
||||||
},
|
},
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1729045942,
|
"lastModified": 1729045942,
|
||||||
@ -907,15 +978,16 @@
|
|||||||
"nixpkgs": "nixpkgs_6"
|
"nixpkgs": "nixpkgs_6"
|
||||||
},
|
},
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1731674227,
|
"lastModified": 1731923801,
|
||||||
"narHash": "sha256-k/ur37KSc+RXcwwz0tgxeamz6wQ5rsOe5hMepzIdD2s=",
|
"narHash": "sha256-SVtXtTGgnKjwPwMLe030l/DVhcm1vH4fXM7tUAPYOZc=",
|
||||||
"owner": "huggingface",
|
"owner": "huggingface",
|
||||||
"repo": "text-generation-inference-nix",
|
"repo": "text-generation-inference-nix",
|
||||||
"rev": "407b9e22a0b7121bf6e171d67ce0144e3f3e39bf",
|
"rev": "b87d4b5bede0ffed7da50e9a5246b133c7d618dc",
|
||||||
"type": "github"
|
"type": "github"
|
||||||
},
|
},
|
||||||
"original": {
|
"original": {
|
||||||
"owner": "huggingface",
|
"owner": "huggingface",
|
||||||
|
"ref": "marlin-kernels-0.3.5",
|
||||||
"repo": "text-generation-inference-nix",
|
"repo": "text-generation-inference-nix",
|
||||||
"type": "github"
|
"type": "github"
|
||||||
}
|
}
|
||||||
|
@ -5,7 +5,7 @@
|
|||||||
inputs.nixpkgs.follows = "tgi-nix/nixpkgs";
|
inputs.nixpkgs.follows = "tgi-nix/nixpkgs";
|
||||||
};
|
};
|
||||||
nix-filter.url = "github:numtide/nix-filter";
|
nix-filter.url = "github:numtide/nix-filter";
|
||||||
tgi-nix.url = "github:huggingface/text-generation-inference-nix";
|
tgi-nix.url = "github:huggingface/text-generation-inference-nix/marlin-kernels-0.3.5";
|
||||||
nixpkgs.follows = "tgi-nix/nixpkgs";
|
nixpkgs.follows = "tgi-nix/nixpkgs";
|
||||||
flake-utils.url = "github:numtide/flake-utils";
|
flake-utils.url = "github:numtide/flake-utils";
|
||||||
rust-overlay = {
|
rust-overlay = {
|
||||||
|
26
server/poetry.lock
generated
26
server/poetry.lock
generated
@ -1288,12 +1288,12 @@ files = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "marlin-kernels"
|
name = "marlin-kernels"
|
||||||
version = "0.3.3"
|
version = "0.3.5"
|
||||||
description = "Marlin quantization kernels"
|
description = "Marlin quantization kernels"
|
||||||
optional = true
|
optional = true
|
||||||
python-versions = ">=3.7"
|
python-versions = ">=3.7"
|
||||||
files = [
|
files = [
|
||||||
{file = "marlin_kernels-0.3.3+cu123torch2.4-cp310-cp310-linux_x86_64.whl", hash = "sha256:36da87e2d486083147c16845f193a438602a0dbc7a0ffb908fbec416c05c5951"},
|
{file = "marlin_kernels-0.3.5+cu123torch2.4-cp310-cp310-linux_x86_64.whl", hash = "sha256:58d4bf0aa1a9533acc05f1e5bf50f727ed0129848d1fa1feb2c5c3fa482518d4"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[package.dependencies]
|
[package.dependencies]
|
||||||
@ -1301,16 +1301,16 @@ torch = "*"
|
|||||||
|
|
||||||
[package.source]
|
[package.source]
|
||||||
type = "url"
|
type = "url"
|
||||||
url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.3/marlin_kernels-0.3.3+cu123torch2.4-cp310-cp310-linux_x86_64.whl"
|
url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.5/marlin_kernels-0.3.5+cu123torch2.4-cp310-cp310-linux_x86_64.whl"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "marlin-kernels"
|
name = "marlin-kernels"
|
||||||
version = "0.3.3"
|
version = "0.3.5"
|
||||||
description = "Marlin quantization kernels"
|
description = "Marlin quantization kernels"
|
||||||
optional = true
|
optional = true
|
||||||
python-versions = ">=3.7"
|
python-versions = ">=3.7"
|
||||||
files = [
|
files = [
|
||||||
{file = "marlin_kernels-0.3.3+cu123torch2.4-cp311-cp311-linux_x86_64.whl", hash = "sha256:c9ca340acdf27df009bf23ee1f37978f999b1a1378736dc3306df27eb48e364d"},
|
{file = "marlin_kernels-0.3.5+cu123torch2.4-cp311-cp311-linux_x86_64.whl", hash = "sha256:a3a3653e6908db013ca96979a5ee1f6a8bb590ee7506a129e06b87d4a8cbb87d"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[package.dependencies]
|
[package.dependencies]
|
||||||
@ -1318,16 +1318,16 @@ torch = "*"
|
|||||||
|
|
||||||
[package.source]
|
[package.source]
|
||||||
type = "url"
|
type = "url"
|
||||||
url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.3/marlin_kernels-0.3.3+cu123torch2.4-cp311-cp311-linux_x86_64.whl"
|
url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.5/marlin_kernels-0.3.5+cu123torch2.4-cp311-cp311-linux_x86_64.whl"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "marlin-kernels"
|
name = "marlin-kernels"
|
||||||
version = "0.3.3"
|
version = "0.3.5"
|
||||||
description = "Marlin quantization kernels"
|
description = "Marlin quantization kernels"
|
||||||
optional = true
|
optional = true
|
||||||
python-versions = ">=3.7"
|
python-versions = ">=3.7"
|
||||||
files = [
|
files = [
|
||||||
{file = "marlin_kernels-0.3.3+cu123torch2.4-cp312-cp312-linux_x86_64.whl", hash = "sha256:21e36b4880fc630882c8265e0cd27b379e40b1b87512f92a321506f4e5397d26"},
|
{file = "marlin_kernels-0.3.5+cu123torch2.4-cp312-cp312-linux_x86_64.whl", hash = "sha256:967b4765a591530a4b9160ae32f3f352a89ae4c71daf43220c99976987d76723"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[package.dependencies]
|
[package.dependencies]
|
||||||
@ -1335,16 +1335,16 @@ torch = "*"
|
|||||||
|
|
||||||
[package.source]
|
[package.source]
|
||||||
type = "url"
|
type = "url"
|
||||||
url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.3/marlin_kernels-0.3.3+cu123torch2.4-cp312-cp312-linux_x86_64.whl"
|
url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.5/marlin_kernels-0.3.5+cu123torch2.4-cp312-cp312-linux_x86_64.whl"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "marlin-kernels"
|
name = "marlin-kernels"
|
||||||
version = "0.3.3"
|
version = "0.3.5"
|
||||||
description = "Marlin quantization kernels"
|
description = "Marlin quantization kernels"
|
||||||
optional = true
|
optional = true
|
||||||
python-versions = ">=3.7"
|
python-versions = ">=3.7"
|
||||||
files = [
|
files = [
|
||||||
{file = "marlin_kernels-0.3.3+cu123torch2.4-cp39-cp39-linux_x86_64.whl", hash = "sha256:cdbf46e68313f76e9648ce7255353763cadbe14b7a789e01f5d502b76d64ee35"},
|
{file = "marlin_kernels-0.3.5+cu123torch2.4-cp39-cp39-linux_x86_64.whl", hash = "sha256:fbe607d5afd1e1fca6e294c3594a0ec279d1f9ea6a2fdf7f34ccb6180d15e195"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[package.dependencies]
|
[package.dependencies]
|
||||||
@ -1352,7 +1352,7 @@ torch = "*"
|
|||||||
|
|
||||||
[package.source]
|
[package.source]
|
||||||
type = "url"
|
type = "url"
|
||||||
url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.3/marlin_kernels-0.3.3+cu123torch2.4-cp39-cp39-linux_x86_64.whl"
|
url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.5/marlin_kernels-0.3.5+cu123torch2.4-cp39-cp39-linux_x86_64.whl"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "mdurl"
|
name = "mdurl"
|
||||||
@ -4066,4 +4066,4 @@ torch = ["torch"]
|
|||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "2.0"
|
lock-version = "2.0"
|
||||||
python-versions = ">=3.9,<3.13"
|
python-versions = ">=3.9,<3.13"
|
||||||
content-hash = "05add88628d836faceae1a26fde4092651a6eca74555ae38ebff879a7895be7e"
|
content-hash = "b889115cee7f1969856f233e74721965f692e40d2a1c2fceccaf6b3bdb19680d"
|
||||||
|
@ -48,10 +48,10 @@ attention-kernels = [
|
|||||||
{ url = "https://github.com/danieldk/attention-kernels/releases/download/v0.1.1/attention_kernels-0.1.1+cu123torch2.4-cp312-cp312-linux_x86_64.whl", python = "~3.12", optional = true },
|
{ url = "https://github.com/danieldk/attention-kernels/releases/download/v0.1.1/attention_kernels-0.1.1+cu123torch2.4-cp312-cp312-linux_x86_64.whl", python = "~3.12", optional = true },
|
||||||
]
|
]
|
||||||
marlin-kernels = [
|
marlin-kernels = [
|
||||||
{ url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.3/marlin_kernels-0.3.3+cu123torch2.4-cp39-cp39-linux_x86_64.whl", python = "~3.9", optional = true },
|
{ url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.5/marlin_kernels-0.3.5+cu123torch2.4-cp39-cp39-linux_x86_64.whl", python = "~3.9", optional = true },
|
||||||
{ url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.3/marlin_kernels-0.3.3+cu123torch2.4-cp310-cp310-linux_x86_64.whl", python = "~3.10", optional = true },
|
{ url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.5/marlin_kernels-0.3.5+cu123torch2.4-cp310-cp310-linux_x86_64.whl", python = "~3.10", optional = true },
|
||||||
{ url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.3/marlin_kernels-0.3.3+cu123torch2.4-cp311-cp311-linux_x86_64.whl", python = "~3.11", optional = true },
|
{ url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.5/marlin_kernels-0.3.5+cu123torch2.4-cp311-cp311-linux_x86_64.whl", python = "~3.11", optional = true },
|
||||||
{ url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.3/marlin_kernels-0.3.3+cu123torch2.4-cp312-cp312-linux_x86_64.whl", python = "~3.12", optional = true },
|
{ url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.5/marlin_kernels-0.3.5+cu123torch2.4-cp312-cp312-linux_x86_64.whl", python = "~3.12", optional = true },
|
||||||
]
|
]
|
||||||
moe-kernels = [
|
moe-kernels = [
|
||||||
{ url = "https://github.com/danieldk/moe-kernels/releases/download/v0.6.0/moe_kernels-0.6.0+cu123torch2.4-cp39-cp39-linux_x86_64.whl", python = "~3.9", optional = true },
|
{ url = "https://github.com/danieldk/moe-kernels/releases/download/v0.6.0/moe_kernels-0.6.0+cu123torch2.4-cp39-cp39-linux_x86_64.whl", python = "~3.9", optional = true },
|
||||||
|
Loading…
Reference in New Issue
Block a user