From 76fef7b1d260c1f04be552a1d30eb2b5816377d6 Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Tue, 4 Jun 2024 13:31:43 +0000 Subject: [PATCH] Don't install flahs on the CPU tests. --- .github/workflows/tests.yaml | 2 +- Makefile | 6 ++++++ server/Makefile | 2 +- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 37dc8305..74479cc6 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -68,7 +68,7 @@ jobs: ~/.cargo/git - name: Install run: | - make install + make install-cpu - name: Run server tests run: | pip install pytest diff --git a/Makefile b/Makefile index b22987cb..a949be37 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,9 @@ install-server: cd server && make install +install-server-cpu: + cd server && make install-server + install-router: cd router && cargo install --path . @@ -12,6 +15,9 @@ install-benchmark: install: install-server install-router install-launcher + +install-cpu: install-server-cpu install-router install-launcher + server-dev: cd server && make run-dev diff --git a/server/Makefile b/server/Makefile index 089917ce..05e3ac52 100644 --- a/server/Makefile +++ b/server/Makefile @@ -23,7 +23,7 @@ install-server: gen-server pip install -e ".[bnb, accelerate, quantize, peft, outlines]" -install: install-server install-cuda +install: install-cuda echo "Installed server" install-cuda: install-server install-flash-attention-v2-cuda install-vllm-cuda install-flash-attention