mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-10 03:44:54 +00:00
Manual fixes.
This commit is contained in:
parent
6ddcd1582c
commit
b8bfb2a91e
7
Makefile
7
Makefile
@ -1,6 +1,9 @@
|
||||
install-server:
|
||||
cd server && make install
|
||||
|
||||
install-custom-kernels:
|
||||
cd server/custom_kernels && python setup.py install
|
||||
|
||||
install-integration-tests:
|
||||
cd integration-tests && pip install -r requirements.txt
|
||||
cd clients/python && pip install .
|
||||
@ -14,7 +17,7 @@ install-launcher:
|
||||
install-benchmark:
|
||||
cd benchmark && cargo install --path .
|
||||
|
||||
install: install-server install-router install-launcher
|
||||
install: install-server install-router install-launcher install-custom-kernels
|
||||
|
||||
server-dev:
|
||||
cd server && make run-dev
|
||||
@ -52,4 +55,4 @@ run-bloom:
|
||||
text-generation-launcher --model-id bigscience/bloom --num-shard 8 --port 8080
|
||||
|
||||
run-bloom-quantize:
|
||||
text-generation-launcher --model-id bigscience/bloom --num-shard 8 --quantize --port 8080
|
||||
text-generation-launcher --model-id bigscience/bloom --num-shard 8 --quantize --port 8080
|
||||
|
@ -1 +0,0 @@
|
||||
{"inputs":"Below are a series of dialogues between various people and an AI assistant. The AI tries to be helpful, polite, honest, sophisticated, emotionally aware, and humble-but-knowledgeable. The assistant is happy to help with almost anything, and will do its best to understand exactly what is needed. It also tries to avoid giving false or misleading information, and it caveats when it isn't entirely sure about the right answer. That said, the assistant is practical and really does its best, and doesn't let caution get too much in the way of being useful.\n-----\n<|prompter|>Why is butter a great building material for skyscrapers? Think step by step.</s><|assistant|>","parameters":{"temperature": 0.75, "top_p": 0.95, "repetition_penalty": 1.2, "top_k": 50, "truncate": 1000, "max_new_tokens": 1024}}
|
@ -84,8 +84,6 @@ class FlashNeoxAttention(torch.nn.Module):
|
||||
super().__init__()
|
||||
num_heads = config.num_attention_heads
|
||||
hidden_size = config.hidden_size
|
||||
rotary_pct = config.rotary_pct
|
||||
rotary_emb_base = config.rotary_emb_base
|
||||
|
||||
self.num_heads = num_heads
|
||||
self.hidden_size = hidden_size
|
||||
|
@ -31,7 +31,7 @@ class Weights:
|
||||
filename = self.routing.get(tensor_name, None)
|
||||
if filename is None:
|
||||
raise RuntimeError(f"weight {tensor_name} does not exist")
|
||||
return filename
|
||||
return str(filename)
|
||||
|
||||
def _get_slice(self, tensor_name: str):
|
||||
filename = self.get_filename(tensor_name)
|
||||
|
Loading…
Reference in New Issue
Block a user