mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-12 04:44:52 +00:00
nix: pure server and support both pure and impure devShells
This commit is contained in:
parent
b70ae0969f
commit
f015bb382c
58
flake.nix
58
flake.nix
@ -50,11 +50,27 @@
|
||||
inherit (poetry2nix.lib.mkPoetry2Nix { inherit pkgs; }) mkPoetryEditablePackage;
|
||||
text-generation-server = mkPoetryEditablePackage { editablePackageSources = ./server; };
|
||||
crateOverrides = import ./nix/crate-overrides.nix { inherit pkgs nix-filter; };
|
||||
launcher = cargoNix.workspaceMembers.text-generation-launcher.build.override {
|
||||
inherit crateOverrides;
|
||||
};
|
||||
router = cargoNix.workspaceMembers.text-generation-router-v3.build.override {
|
||||
inherit crateOverrides;
|
||||
};
|
||||
server = pkgs.python3.pkgs.callPackage ./nix/server.nix { inherit nix-filter; };
|
||||
in
|
||||
{
|
||||
devShells.default =
|
||||
with pkgs;
|
||||
mkShell {
|
||||
devShells = with pkgs; rec {
|
||||
default = pure;
|
||||
|
||||
pure = mkShell {
|
||||
buildInputs = [
|
||||
launcher
|
||||
router
|
||||
server
|
||||
];
|
||||
};
|
||||
|
||||
impure = mkShell {
|
||||
buildInputs =
|
||||
[
|
||||
openssl.dev
|
||||
@ -65,43 +81,15 @@
|
||||
"rust-src"
|
||||
];
|
||||
})
|
||||
protobuf
|
||||
]
|
||||
++ (with python3.pkgs; [
|
||||
venvShellHook
|
||||
pip
|
||||
|
||||
causal-conv1d
|
||||
click
|
||||
einops
|
||||
exllamav2
|
||||
fbgemm-gpu
|
||||
flashinfer
|
||||
flash-attn
|
||||
flash-attn-layer-norm
|
||||
flash-attn-rotary
|
||||
grpc-interceptor
|
||||
grpcio-reflection
|
||||
grpcio-status
|
||||
grpcio-tools
|
||||
hf-transfer
|
||||
ipdb
|
||||
loguru
|
||||
mamba-ssm
|
||||
marlin-kernels
|
||||
opentelemetry-api
|
||||
opentelemetry-exporter-otlp
|
||||
opentelemetry-instrumentation-grpc
|
||||
opentelemetry-semantic-conventions
|
||||
peft
|
||||
tokenizers
|
||||
torch
|
||||
transformers
|
||||
vllm
|
||||
|
||||
(cargoNix.workspaceMembers.text-generation-launcher.build.override { inherit crateOverrides; })
|
||||
(cargoNix.workspaceMembers.text-generation-router-v3.build.override { inherit crateOverrides; })
|
||||
]);
|
||||
|
||||
inputsFrom = [ server ];
|
||||
|
||||
venvDir = "./.venv";
|
||||
|
||||
postVenv = ''
|
||||
@ -109,8 +97,10 @@
|
||||
'';
|
||||
postShellHook = ''
|
||||
unset SOURCE_DATE_EPOCH
|
||||
export PATH=$PATH:~/.cargo/bin
|
||||
'';
|
||||
};
|
||||
};
|
||||
}
|
||||
);
|
||||
}
|
||||
|
105
nix/server.nix
Normal file
105
nix/server.nix
Normal file
@ -0,0 +1,105 @@
|
||||
{
|
||||
nix-filter,
|
||||
buildPythonPackage,
|
||||
poetry-core,
|
||||
mypy-protobuf,
|
||||
causal-conv1d,
|
||||
einops,
|
||||
exllamav2,
|
||||
fbgemm-gpu,
|
||||
flashinfer,
|
||||
flash-attn,
|
||||
flash-attn-layer-norm,
|
||||
flash-attn-rotary,
|
||||
grpc-interceptor,
|
||||
grpcio-reflection,
|
||||
grpcio-status,
|
||||
grpcio-tools,
|
||||
hf-transfer,
|
||||
loguru,
|
||||
mamba-ssm,
|
||||
marlin-kernels,
|
||||
opentelemetry-api,
|
||||
opentelemetry-exporter-otlp,
|
||||
opentelemetry-instrumentation-grpc,
|
||||
opentelemetry-semantic-conventions,
|
||||
peft,
|
||||
safetensors,
|
||||
tokenizers,
|
||||
sentencepiece,
|
||||
transformers,
|
||||
typer,
|
||||
vllm,
|
||||
}:
|
||||
|
||||
let
|
||||
filter = nix-filter.lib;
|
||||
in
|
||||
buildPythonPackage {
|
||||
name = "text-generation-server";
|
||||
|
||||
src = filter {
|
||||
root = ../.;
|
||||
include = with filter; [
|
||||
isDirectory
|
||||
(and (inDirectory "server") (or_ (matchExt "py") (matchExt "pyi")))
|
||||
"server/pyproject.toml"
|
||||
(and (inDirectory "proto/v3") (matchExt "proto"))
|
||||
];
|
||||
};
|
||||
|
||||
pyproject = true;
|
||||
|
||||
build-system = [ poetry-core ];
|
||||
|
||||
nativeBuildInputs = [ mypy-protobuf ];
|
||||
|
||||
pythonRelaxDeps = [
|
||||
"einops"
|
||||
"huggingface-hub"
|
||||
"loguru"
|
||||
"opentelemetry-instrumentation-grpc"
|
||||
"sentencepiece"
|
||||
"typer"
|
||||
];
|
||||
|
||||
pythonRemoveDeps = [ "scipy" ];
|
||||
|
||||
dependencies = [
|
||||
causal-conv1d
|
||||
einops
|
||||
exllamav2
|
||||
fbgemm-gpu
|
||||
flashinfer
|
||||
flash-attn
|
||||
flash-attn-layer-norm
|
||||
flash-attn-rotary
|
||||
grpc-interceptor
|
||||
grpcio-reflection
|
||||
grpcio-status
|
||||
grpcio-tools
|
||||
hf-transfer
|
||||
loguru
|
||||
mamba-ssm
|
||||
marlin-kernels
|
||||
opentelemetry-api
|
||||
opentelemetry-exporter-otlp
|
||||
opentelemetry-instrumentation-grpc
|
||||
opentelemetry-semantic-conventions
|
||||
peft
|
||||
safetensors
|
||||
sentencepiece
|
||||
tokenizers
|
||||
transformers
|
||||
typer
|
||||
vllm
|
||||
];
|
||||
|
||||
prePatch = ''
|
||||
python -m grpc_tools.protoc -Iproto/v3 --python_out=server/text_generation_server/pb \
|
||||
--grpc_python_out=server/text_generation_server/pb --mypy_out=server/text_generation_server/pb proto/v3/generate.proto
|
||||
find server/text_generation_server/pb/ -type f -name "*.py" -print0 -exec sed -i -e 's/^\(import.*pb2\)/from . \1/g' {} \;
|
||||
touch server/text_generation_server/pb/__init__.py
|
||||
cd server
|
||||
'';
|
||||
}
|
Loading…
Reference in New Issue
Block a user