From b8bfb2a91e4a37c48435eea1423787d900563ef2 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Wed, 7 Jun 2023 12:56:04 +0000 Subject: [PATCH] Manual fixes. --- Makefile | 7 +++++-- server/text_generation_server/input.json | 1 - .../models/custom_modeling/flash_neox_modeling.py | 2 -- server/text_generation_server/utils/weights.py | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) delete mode 100644 server/text_generation_server/input.json diff --git a/Makefile b/Makefile index a33aba17..77de731c 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,9 @@ install-server: cd server && make install +install-custom-kernels: + cd server/custom_kernels && python setup.py install + install-integration-tests: cd integration-tests && pip install -r requirements.txt cd clients/python && pip install . @@ -14,7 +17,7 @@ install-launcher: install-benchmark: cd benchmark && cargo install --path . -install: install-server install-router install-launcher +install: install-server install-router install-launcher install-custom-kernels server-dev: cd server && make run-dev @@ -52,4 +55,4 @@ run-bloom: text-generation-launcher --model-id bigscience/bloom --num-shard 8 --port 8080 run-bloom-quantize: - text-generation-launcher --model-id bigscience/bloom --num-shard 8 --quantize --port 8080 \ No newline at end of file + text-generation-launcher --model-id bigscience/bloom --num-shard 8 --quantize --port 8080 diff --git a/server/text_generation_server/input.json b/server/text_generation_server/input.json deleted file mode 100644 index 274a4d9b..00000000 --- a/server/text_generation_server/input.json +++ /dev/null @@ -1 +0,0 @@ -{"inputs":"Below are a series of dialogues between various people and an AI assistant. The AI tries to be helpful, polite, honest, sophisticated, emotionally aware, and humble-but-knowledgeable. The assistant is happy to help with almost anything, and will do its best to understand exactly what is needed. It also tries to avoid giving false or misleading information, and it caveats when it isn't entirely sure about the right answer. That said, the assistant is practical and really does its best, and doesn't let caution get too much in the way of being useful.\n-----\n<|prompter|>Why is butter a great building material for skyscrapers? Think step by step.<|assistant|>","parameters":{"temperature": 0.75, "top_p": 0.95, "repetition_penalty": 1.2, "top_k": 50, "truncate": 1000, "max_new_tokens": 1024}} diff --git a/server/text_generation_server/models/custom_modeling/flash_neox_modeling.py b/server/text_generation_server/models/custom_modeling/flash_neox_modeling.py index b28aa68a..16570ebc 100644 --- a/server/text_generation_server/models/custom_modeling/flash_neox_modeling.py +++ b/server/text_generation_server/models/custom_modeling/flash_neox_modeling.py @@ -84,8 +84,6 @@ class FlashNeoxAttention(torch.nn.Module): super().__init__() num_heads = config.num_attention_heads hidden_size = config.hidden_size - rotary_pct = config.rotary_pct - rotary_emb_base = config.rotary_emb_base self.num_heads = num_heads self.hidden_size = hidden_size diff --git a/server/text_generation_server/utils/weights.py b/server/text_generation_server/utils/weights.py index 2a410ca3..76a4f65a 100644 --- a/server/text_generation_server/utils/weights.py +++ b/server/text_generation_server/utils/weights.py @@ -31,7 +31,7 @@ class Weights: filename = self.routing.get(tensor_name, None) if filename is None: raise RuntimeError(f"weight {tensor_name} does not exist") - return filename + return str(filename) def _get_slice(self, tensor_name: str): filename = self.get_filename(tensor_name)