Nix: add attention/moe/quantization kernels

This commit is contained in:
Daniël de Kok 2025-02-05 10:39:49 +00:00
parent ca1067f9db
commit 4c8ced2826
4 changed files with 11 additions and 6 deletions

View File

@ -978,16 +978,15 @@
"nixpkgs": "nixpkgs_6"
},
"locked": {
"lastModified": 1737970302,
"narHash": "sha256-uoArelKpaixLDozNTrXii2hOWXwJzonPqAgxwZyjzM0=",
"lastModified": 1738749288,
"narHash": "sha256-QaiFND/2yP+AMxykLf4+riYS3gl6OLizsGFnnYnUrVs=",
"owner": "huggingface",
"repo": "text-generation-inference-nix",
"rev": "f43f30042a435e22ab0dbdda8a3d62ad05ff0ada",
"rev": "88d96987f1d81fd46bad84a07d443b82d280ece6",
"type": "github"
},
"original": {
"owner": "huggingface",
"ref": "hf-kernels",
"repo": "text-generation-inference-nix",
"type": "github"
}

View File

@ -5,7 +5,7 @@
inputs.nixpkgs.follows = "tgi-nix/nixpkgs";
};
nix-filter.url = "github:numtide/nix-filter";
tgi-nix.url = "github:huggingface/text-generation-inference-nix/hf-kernels";
tgi-nix.url = "github:huggingface/text-generation-inference-nix";
nixpkgs.follows = "tgi-nix/nixpkgs";
flake-utils.url = "github:numtide/flake-utils";
rust-overlay = {

View File

@ -3,6 +3,7 @@
buildPythonPackage,
poetry-core,
mypy-protobuf,
attention,
awq-inference-engine,
causal-conv1d,
compressed-tensors,
@ -22,6 +23,7 @@
hf-transfer,
loguru,
mamba-ssm,
moe,
opentelemetry-api,
opentelemetry-exporter-otlp,
opentelemetry-instrumentation-grpc,
@ -33,6 +35,7 @@
punica-kernels,
py-cpuinfo,
pydantic,
quantization,
safetensors,
tokenizers,
torch,
@ -76,6 +79,7 @@ buildPythonPackage {
pythonRemoveDeps = [ "scipy" ];
dependencies = [
attention
awq-inference-engine
eetq
causal-conv1d
@ -94,6 +98,7 @@ buildPythonPackage {
hf-transfer
loguru
mamba-ssm
moe
opentelemetry-api
opentelemetry-exporter-otlp
opentelemetry-instrumentation-grpc
@ -105,6 +110,7 @@ buildPythonPackage {
punica-kernels
py-cpuinfo
pydantic
quantization
safetensors
sentencepiece
tokenizers

View File

@ -4241,4 +4241,4 @@
}
]
}
]
]