Nix: add attention/moe/quantization kernels

This commit is contained in:
Daniël de Kok 2025-02-05 10:39:49 +00:00
parent ca1067f9db
commit 4c8ced2826
4 changed files with 11 additions and 6 deletions

View File

@ -978,16 +978,15 @@
"nixpkgs": "nixpkgs_6" "nixpkgs": "nixpkgs_6"
}, },
"locked": { "locked": {
"lastModified": 1737970302, "lastModified": 1738749288,
"narHash": "sha256-uoArelKpaixLDozNTrXii2hOWXwJzonPqAgxwZyjzM0=", "narHash": "sha256-QaiFND/2yP+AMxykLf4+riYS3gl6OLizsGFnnYnUrVs=",
"owner": "huggingface", "owner": "huggingface",
"repo": "text-generation-inference-nix", "repo": "text-generation-inference-nix",
"rev": "f43f30042a435e22ab0dbdda8a3d62ad05ff0ada", "rev": "88d96987f1d81fd46bad84a07d443b82d280ece6",
"type": "github" "type": "github"
}, },
"original": { "original": {
"owner": "huggingface", "owner": "huggingface",
"ref": "hf-kernels",
"repo": "text-generation-inference-nix", "repo": "text-generation-inference-nix",
"type": "github" "type": "github"
} }

View File

@ -5,7 +5,7 @@
inputs.nixpkgs.follows = "tgi-nix/nixpkgs"; inputs.nixpkgs.follows = "tgi-nix/nixpkgs";
}; };
nix-filter.url = "github:numtide/nix-filter"; nix-filter.url = "github:numtide/nix-filter";
tgi-nix.url = "github:huggingface/text-generation-inference-nix/hf-kernels"; tgi-nix.url = "github:huggingface/text-generation-inference-nix";
nixpkgs.follows = "tgi-nix/nixpkgs"; nixpkgs.follows = "tgi-nix/nixpkgs";
flake-utils.url = "github:numtide/flake-utils"; flake-utils.url = "github:numtide/flake-utils";
rust-overlay = { rust-overlay = {

View File

@ -3,6 +3,7 @@
buildPythonPackage, buildPythonPackage,
poetry-core, poetry-core,
mypy-protobuf, mypy-protobuf,
attention,
awq-inference-engine, awq-inference-engine,
causal-conv1d, causal-conv1d,
compressed-tensors, compressed-tensors,
@ -22,6 +23,7 @@
hf-transfer, hf-transfer,
loguru, loguru,
mamba-ssm, mamba-ssm,
moe,
opentelemetry-api, opentelemetry-api,
opentelemetry-exporter-otlp, opentelemetry-exporter-otlp,
opentelemetry-instrumentation-grpc, opentelemetry-instrumentation-grpc,
@ -33,6 +35,7 @@
punica-kernels, punica-kernels,
py-cpuinfo, py-cpuinfo,
pydantic, pydantic,
quantization,
safetensors, safetensors,
tokenizers, tokenizers,
torch, torch,
@ -76,6 +79,7 @@ buildPythonPackage {
pythonRemoveDeps = [ "scipy" ]; pythonRemoveDeps = [ "scipy" ];
dependencies = [ dependencies = [
attention
awq-inference-engine awq-inference-engine
eetq eetq
causal-conv1d causal-conv1d
@ -94,6 +98,7 @@ buildPythonPackage {
hf-transfer hf-transfer
loguru loguru
mamba-ssm mamba-ssm
moe
opentelemetry-api opentelemetry-api
opentelemetry-exporter-otlp opentelemetry-exporter-otlp
opentelemetry-instrumentation-grpc opentelemetry-instrumentation-grpc
@ -105,6 +110,7 @@ buildPythonPackage {
punica-kernels punica-kernels
py-cpuinfo py-cpuinfo
pydantic pydantic
quantization
safetensors safetensors
sentencepiece sentencepiece
tokenizers tokenizers