mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-10 03:44:54 +00:00
Green ?
This commit is contained in:
parent
c471e46cf8
commit
165bb4b6c0
1
.gitignore
vendored
1
.gitignore
vendored
@ -1,3 +1,4 @@
|
||||
.idea
|
||||
target
|
||||
router/tokenizer.json
|
||||
.*__pycache__.*
|
||||
|
@ -37,8 +37,8 @@ async def test_flash_neox_load(flash_neox, generate_load, response_snapshot):
|
||||
generated_texts = [r.generated_text for r in responses]
|
||||
|
||||
assert len(generated_texts) == 4
|
||||
assert generated_texts, all(
|
||||
assert all(
|
||||
[text == generated_texts[0] for text in generated_texts]
|
||||
)
|
||||
), generated_texts
|
||||
|
||||
assert responses == response_snapshot
|
||||
|
@ -94,9 +94,6 @@ class FlashNeoxAttention(torch.nn.Module):
|
||||
|
||||
rotary_ndims = int(self.head_size * rotary_pct)
|
||||
self.rotary_emb = PositionRotaryEmbedding(rotary_ndims, base=rotary_emb_base)
|
||||
self.rotary_emb.inv_freq = nn.Parameter(
|
||||
weights.get_tensor(f"{prefix}.rotary_emb.inv_freq")
|
||||
)
|
||||
self.softmax_scale = self.head_size ** (-0.5)
|
||||
|
||||
self.query_key_value = load_qkv(
|
||||
|
Loading…
Reference in New Issue
Block a user