mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-10 11:54:52 +00:00
Green ?
This commit is contained in:
parent
c471e46cf8
commit
165bb4b6c0
1
.gitignore
vendored
1
.gitignore
vendored
@ -1,3 +1,4 @@
|
|||||||
.idea
|
.idea
|
||||||
target
|
target
|
||||||
router/tokenizer.json
|
router/tokenizer.json
|
||||||
|
.*__pycache__.*
|
||||||
|
@ -37,8 +37,8 @@ async def test_flash_neox_load(flash_neox, generate_load, response_snapshot):
|
|||||||
generated_texts = [r.generated_text for r in responses]
|
generated_texts = [r.generated_text for r in responses]
|
||||||
|
|
||||||
assert len(generated_texts) == 4
|
assert len(generated_texts) == 4
|
||||||
assert generated_texts, all(
|
assert all(
|
||||||
[text == generated_texts[0] for text in generated_texts]
|
[text == generated_texts[0] for text in generated_texts]
|
||||||
)
|
), generated_texts
|
||||||
|
|
||||||
assert responses == response_snapshot
|
assert responses == response_snapshot
|
||||||
|
@ -94,9 +94,6 @@ class FlashNeoxAttention(torch.nn.Module):
|
|||||||
|
|
||||||
rotary_ndims = int(self.head_size * rotary_pct)
|
rotary_ndims = int(self.head_size * rotary_pct)
|
||||||
self.rotary_emb = PositionRotaryEmbedding(rotary_ndims, base=rotary_emb_base)
|
self.rotary_emb = PositionRotaryEmbedding(rotary_ndims, base=rotary_emb_base)
|
||||||
self.rotary_emb.inv_freq = nn.Parameter(
|
|
||||||
weights.get_tensor(f"{prefix}.rotary_emb.inv_freq")
|
|
||||||
)
|
|
||||||
self.softmax_scale = self.head_size ** (-0.5)
|
self.softmax_scale = self.head_size ** (-0.5)
|
||||||
|
|
||||||
self.query_key_value = load_qkv(
|
self.query_key_value = load_qkv(
|
||||||
|
Loading…
Reference in New Issue
Block a user