Manual fixes.

2025-09-10 03:44:54 +00:00 · 2023-06-07 12:56:04 +00:00 · 2023-06-07 12:56:04 +00:00 · b8bfb2a91e
commit b8bfb2a91e
parent 6ddcd1582c
4 changed files with 6 additions and 6 deletions
--- a/7
+++ b/7
@ -1,6 +1,9 @@
 install-server:
 	cd server && make install

+install-custom-kernels:
+	cd server/custom_kernels && python setup.py install
+
 install-integration-tests:
 	cd integration-tests && pip install -r requirements.txt
 	cd clients/python && pip install .
@ -14,7 +17,7 @@ install-launcher:
 install-benchmark:
 	cd benchmark && cargo install --path .

-install: install-server install-router install-launcher
+install: install-server install-router install-launcher install-custom-kernels

 server-dev:
 	cd server && make run-dev
@ -52,4 +55,4 @@ run-bloom:
 	text-generation-launcher --model-id bigscience/bloom --num-shard 8 --port 8080

 run-bloom-quantize:
-	text-generation-launcher --model-id bigscience/bloom --num-shard 8 --quantize --port 8080
+	text-generation-launcher --model-id bigscience/bloom --num-shard 8 --quantize --port 8080
--- a/server/text_generation_server/input.json
+++ b/server/text_generation_server/input.json
@ -1 +0,0 @@
-{"inputs":"Below are a series of dialogues between various people and an AI assistant. The AI tries to be helpful, polite, honest, sophisticated, emotionally aware, and humble-but-knowledgeable. The assistant is happy to help with almost anything, and will do its best to understand exactly what is needed. It also tries to avoid giving false or misleading information, and it caveats when it isn't entirely sure about the right answer. That said, the assistant is practical and really does its best, and doesn't let caution get too much in the way of being useful.\n-----\n<|prompter|>Why is butter a great building material for skyscrapers? Think step by step.</s><|assistant|>","parameters":{"temperature": 0.75, "top_p": 0.95, "repetition_penalty": 1.2, "top_k": 50, "truncate": 1000, "max_new_tokens": 1024}}
--- a/server/text_generation_server/models/custom_modeling/flash_neox_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_neox_modeling.py
@ -84,8 +84,6 @@ class FlashNeoxAttention(torch.nn.Module):
        super().__init__()
        num_heads = config.num_attention_heads
        hidden_size = config.hidden_size
-        rotary_pct = config.rotary_pct
-        rotary_emb_base = config.rotary_emb_base

        self.num_heads = num_heads
        self.hidden_size = hidden_size
--- a/server/text_generation_server/utils/weights.py
+++ b/server/text_generation_server/utils/weights.py
@ -31,7 +31,7 @@ class Weights:
        filename = self.routing.get(tensor_name, None)
        if filename is None:
            raise RuntimeError(f"weight {tensor_name} does not exist")
-        return filename
+        return str(filename)

    def _get_slice(self, tensor_name: str):
        filename = self.get_filename(tensor_name)
				`@ -1 +0,0 @@`
				{"inputs":"Below are a series of dialogues between various people and an AI assistant. The AI tries to be helpful, polite, honest, sophisticated, emotionally aware, and humble-but-knowledgeable. The assistant is happy to help with almost anything, and will do its best to understand exactly what is needed. It also tries to avoid giving false or misleading information, and it caveats when it isn't entirely sure about the right answer. That said, the assistant is practical and really does its best, and doesn't let caution get too much in the way of being useful.\n-----\n<\|prompter\|>Why is butter a great building material for skyscrapers? Think step by step.</s><\|assistant\|>","parameters":{"temperature": 0.75, "top_p": 0.95, "repetition_penalty": 1.2, "top_k": 50, "truncate": 1000, "max_new_tokens": 1024}}