Manual fixes.

This commit is contained in:
Ubuntu 2023-06-07 12:56:04 +00:00
parent 6ddcd1582c
commit b8bfb2a91e
4 changed files with 6 additions and 6 deletions

View File

@ -1,6 +1,9 @@
install-server:
cd server && make install
install-custom-kernels:
cd server/custom_kernels && python setup.py install
install-integration-tests:
cd integration-tests && pip install -r requirements.txt
cd clients/python && pip install .
@ -14,7 +17,7 @@ install-launcher:
install-benchmark:
cd benchmark && cargo install --path .
install: install-server install-router install-launcher
install: install-server install-router install-launcher install-custom-kernels
server-dev:
cd server && make run-dev
@ -52,4 +55,4 @@ run-bloom:
text-generation-launcher --model-id bigscience/bloom --num-shard 8 --port 8080
run-bloom-quantize:
text-generation-launcher --model-id bigscience/bloom --num-shard 8 --quantize --port 8080
text-generation-launcher --model-id bigscience/bloom --num-shard 8 --quantize --port 8080

View File

@ -1 +0,0 @@
{"inputs":"Below are a series of dialogues between various people and an AI assistant. The AI tries to be helpful, polite, honest, sophisticated, emotionally aware, and humble-but-knowledgeable. The assistant is happy to help with almost anything, and will do its best to understand exactly what is needed. It also tries to avoid giving false or misleading information, and it caveats when it isn't entirely sure about the right answer. That said, the assistant is practical and really does its best, and doesn't let caution get too much in the way of being useful.\n-----\n<|prompter|>Why is butter a great building material for skyscrapers? Think step by step.</s><|assistant|>","parameters":{"temperature": 0.75, "top_p": 0.95, "repetition_penalty": 1.2, "top_k": 50, "truncate": 1000, "max_new_tokens": 1024}}

View File

@ -84,8 +84,6 @@ class FlashNeoxAttention(torch.nn.Module):
super().__init__()
num_heads = config.num_attention_heads
hidden_size = config.hidden_size
rotary_pct = config.rotary_pct
rotary_emb_base = config.rotary_emb_base
self.num_heads = num_heads
self.hidden_size = hidden_size

View File

@ -31,7 +31,7 @@ class Weights:
filename = self.routing.get(tensor_name, None)
if filename is None:
raise RuntimeError(f"weight {tensor_name} does not exist")
return filename
return str(filename)
def _get_slice(self, tensor_name: str):
filename = self.get_filename(tensor_name)