diff --git a/flake.nix b/flake.nix index 299e6b3d..5a687dd6 100644 --- a/flake.nix +++ b/flake.nix @@ -50,11 +50,27 @@ inherit (poetry2nix.lib.mkPoetry2Nix { inherit pkgs; }) mkPoetryEditablePackage; text-generation-server = mkPoetryEditablePackage { editablePackageSources = ./server; }; crateOverrides = import ./nix/crate-overrides.nix { inherit pkgs nix-filter; }; + launcher = cargoNix.workspaceMembers.text-generation-launcher.build.override { + inherit crateOverrides; + }; + router = cargoNix.workspaceMembers.text-generation-router-v3.build.override { + inherit crateOverrides; + }; + server = pkgs.python3.pkgs.callPackage ./nix/server.nix { inherit nix-filter; }; in { - devShells.default = - with pkgs; - mkShell { + devShells = with pkgs; rec { + default = pure; + + pure = mkShell { + buildInputs = [ + launcher + router + server + ]; + }; + + impure = mkShell { buildInputs = [ openssl.dev @@ -65,43 +81,15 @@ "rust-src" ]; }) + protobuf ] ++ (with python3.pkgs; [ venvShellHook pip - - causal-conv1d - click - einops - exllamav2 - fbgemm-gpu - flashinfer - flash-attn - flash-attn-layer-norm - flash-attn-rotary - grpc-interceptor - grpcio-reflection - grpcio-status - grpcio-tools - hf-transfer - ipdb - loguru - mamba-ssm - marlin-kernels - opentelemetry-api - opentelemetry-exporter-otlp - opentelemetry-instrumentation-grpc - opentelemetry-semantic-conventions - peft - tokenizers - torch - transformers - vllm - - (cargoNix.workspaceMembers.text-generation-launcher.build.override { inherit crateOverrides; }) - (cargoNix.workspaceMembers.text-generation-router-v3.build.override { inherit crateOverrides; }) ]); + inputsFrom = [ server ]; + venvDir = "./.venv"; postVenv = '' @@ -109,8 +97,10 @@ ''; postShellHook = '' unset SOURCE_DATE_EPOCH + export PATH=$PATH:~/.cargo/bin ''; }; + }; } ); } diff --git a/nix/server.nix b/nix/server.nix new file mode 100644 index 00000000..ff40757a --- /dev/null +++ b/nix/server.nix @@ -0,0 +1,105 @@ +{ + nix-filter, + buildPythonPackage, + poetry-core, + mypy-protobuf, + causal-conv1d, + einops, + exllamav2, + fbgemm-gpu, + flashinfer, + flash-attn, + flash-attn-layer-norm, + flash-attn-rotary, + grpc-interceptor, + grpcio-reflection, + grpcio-status, + grpcio-tools, + hf-transfer, + loguru, + mamba-ssm, + marlin-kernels, + opentelemetry-api, + opentelemetry-exporter-otlp, + opentelemetry-instrumentation-grpc, + opentelemetry-semantic-conventions, + peft, + safetensors, + tokenizers, + sentencepiece, + transformers, + typer, + vllm, +}: + +let + filter = nix-filter.lib; +in +buildPythonPackage { + name = "text-generation-server"; + + src = filter { + root = ../.; + include = with filter; [ + isDirectory + (and (inDirectory "server") (or_ (matchExt "py") (matchExt "pyi"))) + "server/pyproject.toml" + (and (inDirectory "proto/v3") (matchExt "proto")) + ]; + }; + + pyproject = true; + + build-system = [ poetry-core ]; + + nativeBuildInputs = [ mypy-protobuf ]; + + pythonRelaxDeps = [ + "einops" + "huggingface-hub" + "loguru" + "opentelemetry-instrumentation-grpc" + "sentencepiece" + "typer" + ]; + + pythonRemoveDeps = [ "scipy" ]; + + dependencies = [ + causal-conv1d + einops + exllamav2 + fbgemm-gpu + flashinfer + flash-attn + flash-attn-layer-norm + flash-attn-rotary + grpc-interceptor + grpcio-reflection + grpcio-status + grpcio-tools + hf-transfer + loguru + mamba-ssm + marlin-kernels + opentelemetry-api + opentelemetry-exporter-otlp + opentelemetry-instrumentation-grpc + opentelemetry-semantic-conventions + peft + safetensors + sentencepiece + tokenizers + transformers + typer + vllm + ]; + + prePatch = '' + python -m grpc_tools.protoc -Iproto/v3 --python_out=server/text_generation_server/pb \ + --grpc_python_out=server/text_generation_server/pb --mypy_out=server/text_generation_server/pb proto/v3/generate.proto + find server/text_generation_server/pb/ -type f -name "*.py" -print0 -exec sed -i -e 's/^\(import.*pb2\)/from . \1/g' {} \; + touch server/text_generation_server/pb/__init__.py + cd server + ''; +}