This commit is contained in:
Ubuntu 2023-05-25 08:45:41 +00:00 committed by Nicolas Patry
parent c471e46cf8
commit 165bb4b6c0
3 changed files with 3 additions and 5 deletions

1
.gitignore vendored
View File

@ -1,3 +1,4 @@
.idea
target
router/tokenizer.json
.*__pycache__.*

View File

@ -37,8 +37,8 @@ async def test_flash_neox_load(flash_neox, generate_load, response_snapshot):
generated_texts = [r.generated_text for r in responses]
assert len(generated_texts) == 4
assert generated_texts, all(
assert all(
[text == generated_texts[0] for text in generated_texts]
)
), generated_texts
assert responses == response_snapshot

View File

@ -94,9 +94,6 @@ class FlashNeoxAttention(torch.nn.Module):
rotary_ndims = int(self.head_size * rotary_pct)
self.rotary_emb = PositionRotaryEmbedding(rotary_ndims, base=rotary_emb_base)
self.rotary_emb.inv_freq = nn.Parameter(
weights.get_tensor(f"{prefix}.rotary_emb.inv_freq")
)
self.softmax_scale = self.head_size ** (-0.5)
self.query_key_value = load_qkv(