From b8bfb2a91e4a37c48435eea1423787d900563ef2 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-41-161.ec2.internal>
Date: Wed, 7 Jun 2023 12:56:04 +0000
Subject: [PATCH] Manual fixes.

---
 Makefile                                                   | 7 +++++--
 server/text_generation_server/input.json                   | 1 -
 .../models/custom_modeling/flash_neox_modeling.py          | 2 --
 server/text_generation_server/utils/weights.py             | 2 +-
 4 files changed, 6 insertions(+), 6 deletions(-)
 delete mode 100644 server/text_generation_server/input.json
diff --git a/Makefile b/Makefile
index a33aba17..77de731c 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,9 @@
 install-server:
 	cd server && make install
 
+install-custom-kernels:
+	cd server/custom_kernels && python setup.py install
+
 install-integration-tests:
 	cd integration-tests && pip install -r requirements.txt
 	cd clients/python && pip install .
@@ -14,7 +17,7 @@ install-launcher:
 install-benchmark:
 	cd benchmark && cargo install --path .
 
-install: install-server install-router install-launcher
+install: install-server install-router install-launcher install-custom-kernels
 
 server-dev:
 	cd server && make run-dev
@@ -52,4 +55,4 @@ run-bloom:
 	text-generation-launcher --model-id bigscience/bloom --num-shard 8 --port 8080
 
 run-bloom-quantize:
-	text-generation-launcher --model-id bigscience/bloom --num-shard 8 --quantize --port 8080
\ No newline at end of file
+	text-generation-launcher --model-id bigscience/bloom --num-shard 8 --quantize --port 8080
diff --git a/server/text_generation_server/input.json b/server/text_generation_server/input.json
deleted file mode 100644
index 274a4d9b..00000000
--- a/server/text_generation_server/input.json
+++ /dev/null
@@ -1 +0,0 @@
-{"inputs":"Below are a series of dialogues between various people and an AI assistant. The AI tries to be helpful, polite, honest, sophisticated, emotionally aware, and humble-but-knowledgeable. The assistant is happy to help with almost anything, and will do its best to understand exactly what is needed. It also tries to avoid giving false or misleading information, and it caveats when it isn't entirely sure about the right answer. That said, the assistant is practical and really does its best, and doesn't let caution get too much in the way of being useful.\n-----\n<|prompter|>Why is butter a great building material for skyscrapers? Think step by step.</s><|assistant|>","parameters":{"temperature": 0.75, "top_p": 0.95, "repetition_penalty": 1.2, "top_k": 50, "truncate": 1000, "max_new_tokens": 1024}}
diff --git a/server/text_generation_server/models/custom_modeling/flash_neox_modeling.py b/server/text_generation_server/models/custom_modeling/flash_neox_modeling.py
index b28aa68a..16570ebc 100644
--- a/server/text_generation_server/models/custom_modeling/flash_neox_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_neox_modeling.py
@@ -84,8 +84,6 @@ class FlashNeoxAttention(torch.nn.Module):
         super().__init__()
         num_heads = config.num_attention_heads
         hidden_size = config.hidden_size
-        rotary_pct = config.rotary_pct
-        rotary_emb_base = config.rotary_emb_base
 
         self.num_heads = num_heads
         self.hidden_size = hidden_size
diff --git a/server/text_generation_server/utils/weights.py b/server/text_generation_server/utils/weights.py
index 2a410ca3..76a4f65a 100644
--- a/server/text_generation_server/utils/weights.py
+++ b/server/text_generation_server/utils/weights.py
@@ -31,7 +31,7 @@ class Weights:
         filename = self.routing.get(tensor_name, None)
         if filename is None:
             raise RuntimeError(f"weight {tensor_name} does not exist")
-        return filename
+        return str(filename)
 
     def _get_slice(self, tensor_name: str):
         filename = self.get_filename(tensor_name)