diff --git a/flake.lock b/flake.lock index 8c5b3ef3..f2aafda9 100644 --- a/flake.lock +++ b/flake.lock @@ -978,16 +978,15 @@ "nixpkgs": "nixpkgs_6" }, "locked": { - "lastModified": 1737970302, - "narHash": "sha256-uoArelKpaixLDozNTrXii2hOWXwJzonPqAgxwZyjzM0=", + "lastModified": 1738749288, + "narHash": "sha256-QaiFND/2yP+AMxykLf4+riYS3gl6OLizsGFnnYnUrVs=", "owner": "huggingface", "repo": "text-generation-inference-nix", - "rev": "f43f30042a435e22ab0dbdda8a3d62ad05ff0ada", + "rev": "88d96987f1d81fd46bad84a07d443b82d280ece6", "type": "github" }, "original": { "owner": "huggingface", - "ref": "hf-kernels", "repo": "text-generation-inference-nix", "type": "github" } diff --git a/flake.nix b/flake.nix index 9c221645..83cedfa6 100644 --- a/flake.nix +++ b/flake.nix @@ -5,7 +5,7 @@ inputs.nixpkgs.follows = "tgi-nix/nixpkgs"; }; nix-filter.url = "github:numtide/nix-filter"; - tgi-nix.url = "github:huggingface/text-generation-inference-nix/hf-kernels"; + tgi-nix.url = "github:huggingface/text-generation-inference-nix"; nixpkgs.follows = "tgi-nix/nixpkgs"; flake-utils.url = "github:numtide/flake-utils"; rust-overlay = { diff --git a/nix/server.nix b/nix/server.nix index e7af940e..a7c8c799 100644 --- a/nix/server.nix +++ b/nix/server.nix @@ -3,6 +3,7 @@ buildPythonPackage, poetry-core, mypy-protobuf, + attention, awq-inference-engine, causal-conv1d, compressed-tensors, @@ -22,6 +23,7 @@ hf-transfer, loguru, mamba-ssm, + moe, opentelemetry-api, opentelemetry-exporter-otlp, opentelemetry-instrumentation-grpc, @@ -33,6 +35,7 @@ punica-kernels, py-cpuinfo, pydantic, + quantization, safetensors, tokenizers, torch, @@ -76,6 +79,7 @@ buildPythonPackage { pythonRemoveDeps = [ "scipy" ]; dependencies = [ + attention awq-inference-engine eetq causal-conv1d @@ -94,6 +98,7 @@ buildPythonPackage { hf-transfer loguru mamba-ssm + moe opentelemetry-api opentelemetry-exporter-otlp opentelemetry-instrumentation-grpc @@ -105,6 +110,7 @@ buildPythonPackage { punica-kernels py-cpuinfo pydantic + quantization safetensors sentencepiece tokenizers diff --git a/server/hf-kernels.lock b/server/hf-kernels.lock index f44b40c0..9b9e9beb 100644 --- a/server/hf-kernels.lock +++ b/server/hf-kernels.lock @@ -4241,4 +4241,4 @@ } ] } -] \ No newline at end of file +]