Update FP8 KV cache test to use checkpoint with scales

This commit is contained in:
Daniël de Kok 2024-10-21 11:18:52 +00:00
parent ba4ac96399
commit 1f18cb6aa6
4 changed files with 151 additions and 107 deletions

View File

@ -11,27 +11,27 @@
},
{
"id": 3923,
"logprob": -5.6328125,
"logprob": -6.1875,
"text": "What"
},
{
"id": 374,
"logprob": -1.2265625,
"logprob": -0.93359375,
"text": " is"
},
{
"id": 5655,
"logprob": -9.1015625,
"logprob": -9.875,
"text": " deep"
},
{
"id": 6975,
"logprob": -1.8085938,
"logprob": -1.1796875,
"text": " learning"
},
{
"id": 30,
"logprob": -1.0439453,
"logprob": -1.75,
"text": "?"
}
],
@ -39,66 +39,66 @@
"tokens": [
{
"id": 18682,
"logprob": -2.1992188,
"logprob": -1.109375,
"special": false,
"text": " Deep"
},
{
"id": 6975,
"logprob": -0.079956055,
"logprob": -0.005432129,
"special": false,
"text": " learning"
},
{
"id": 374,
"logprob": -0.2763672,
"logprob": -0.028808594,
"special": false,
"text": " is"
},
{
"id": 264,
"logprob": -0.37548828,
"logprob": -0.013671875,
"special": false,
"text": " a"
},
{
"id": 27084,
"logprob": -1.4628906,
"logprob": -0.69921875,
"special": false,
"text": " subset"
},
{
"id": 315,
"logprob": -0.02885437,
"logprob": -0.0005874634,
"special": false,
"text": " of"
},
{
"id": 5780,
"logprob": -0.2565918,
"logprob": -0.026855469,
"special": false,
"text": " machine"
},
{
"id": 6975,
"logprob": -0.0063438416,
"logprob": -0.00020885468,
"special": false,
"text": " learning"
},
{
"id": 430,
"logprob": -1.3056641,
"logprob": -0.17773438,
"special": false,
"text": " that"
},
{
"id": 374,
"logprob": -1.6035156,
"id": 18065,
"logprob": -0.703125,
"special": false,
"text": " is"
"text": " involves"
}
],
"top_tokens": null
},
"generated_text": " Deep learning is a subset of machine learning that is"
"generated_text": " Deep learning is a subset of machine learning that involves"
}

View File

@ -1,8 +1,8 @@
{
"details": {
"best_of_sequences": null,
"finish_reason": "eos_token",
"generated_tokens": 3,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 128000,
@ -11,22 +11,22 @@
},
{
"id": 374,
"logprob": -22.96875,
"logprob": -18.0,
"text": " is"
},
{
"id": 5655,
"logprob": -10.71875,
"logprob": -11.75,
"text": " deep"
},
{
"id": 6975,
"logprob": -2.6992188,
"logprob": -2.0625,
"text": " learning"
},
{
"id": 30,
"logprob": -4.8398438,
"logprob": -6.0,
"text": "?"
}
],
@ -34,24 +34,66 @@
"tokens": [
{
"id": 720,
"logprob": -0.4411621,
"logprob": 0.0,
"special": false,
"text": " \n"
},
{
"id": 220,
"logprob": -0.35864258,
"id": 34564,
"logprob": -0.11279297,
"special": false,
"text": " "
"text": "Deep"
},
{
"id": 128001,
"id": 6975,
"logprob": -0.16015625,
"special": false,
"text": " learning"
},
{
"id": 320,
"logprob": -0.25195312,
"special": false,
"text": " ("
},
{
"id": 16931,
"logprob": -1.703125,
"special": false,
"text": "DL"
},
{
"id": 8,
"logprob": 0.0,
"special": true,
"text": "<|end_of_text|>"
"special": false,
"text": ")"
},
{
"id": 374,
"logprob": -1.140625,
"special": false,
"text": " is"
},
{
"id": 264,
"logprob": 0.0,
"special": false,
"text": " a"
},
{
"id": 1207,
"logprob": -1.3125,
"special": false,
"text": " sub"
},
{
"id": 2630,
"logprob": 0.0,
"special": false,
"text": "field"
}
],
"top_tokens": null
},
"generated_text": "What is deep learning? \n "
"generated_text": "What is deep learning? \nDeep learning (DL) is a subfield"
}

View File

@ -12,27 +12,27 @@
},
{
"id": 3923,
"logprob": -5.6328125,
"logprob": -6.1875,
"text": "What"
},
{
"id": 374,
"logprob": -1.2265625,
"logprob": -0.93359375,
"text": " is"
},
{
"id": 5655,
"logprob": -9.1015625,
"logprob": -9.875,
"text": " deep"
},
{
"id": 6975,
"logprob": -1.8085938,
"logprob": -1.1796875,
"text": " learning"
},
{
"id": 30,
"logprob": -1.0439453,
"logprob": -1.75,
"text": "?"
}
],
@ -40,68 +40,68 @@
"tokens": [
{
"id": 18682,
"logprob": -2.1992188,
"logprob": -1.109375,
"special": false,
"text": " Deep"
},
{
"id": 6975,
"logprob": -0.07897949,
"logprob": -0.0047912598,
"special": false,
"text": " learning"
},
{
"id": 374,
"logprob": -0.27734375,
"logprob": -0.025512695,
"special": false,
"text": " is"
},
{
"id": 264,
"logprob": -0.37402344,
"logprob": -0.012145996,
"special": false,
"text": " a"
},
{
"id": 27084,
"logprob": -1.4511719,
"logprob": -0.72265625,
"special": false,
"text": " subset"
},
{
"id": 315,
"logprob": -0.02909851,
"logprob": -0.0005760193,
"special": false,
"text": " of"
},
{
"id": 5780,
"logprob": -0.25854492,
"logprob": -0.02722168,
"special": false,
"text": " machine"
},
{
"id": 6975,
"logprob": -0.0061798096,
"logprob": -0.00023651123,
"special": false,
"text": " learning"
},
{
"id": 430,
"logprob": -1.3046875,
"logprob": -0.17285156,
"special": false,
"text": " that"
},
{
"id": 374,
"logprob": -1.5537109,
"id": 18065,
"logprob": -0.703125,
"special": false,
"text": " is"
"text": " involves"
}
],
"top_tokens": null
},
"generated_text": " Deep learning is a subset of machine learning that is"
"generated_text": " Deep learning is a subset of machine learning that involves"
},
{
"details": {
@ -116,27 +116,27 @@
},
{
"id": 3923,
"logprob": -5.6328125,
"logprob": -6.21875,
"text": "What"
},
{
"id": 374,
"logprob": -1.2265625,
"logprob": -0.95703125,
"text": " is"
},
{
"id": 5655,
"logprob": -9.1015625,
"logprob": -9.9375,
"text": " deep"
},
{
"id": 6975,
"logprob": -1.8085938,
"logprob": -1.1328125,
"text": " learning"
},
{
"id": 30,
"logprob": -1.0439453,
"logprob": -1.75,
"text": "?"
}
],
@ -144,68 +144,68 @@
"tokens": [
{
"id": 18682,
"logprob": -2.1992188,
"logprob": -1.1796875,
"special": false,
"text": " Deep"
},
{
"id": 6975,
"logprob": -0.07897949,
"logprob": -0.005432129,
"special": false,
"text": " learning"
},
{
"id": 374,
"logprob": -0.27734375,
"logprob": -0.02758789,
"special": false,
"text": " is"
},
{
"id": 264,
"logprob": -0.37402344,
"logprob": -0.013366699,
"special": false,
"text": " a"
},
{
"id": 27084,
"logprob": -1.4511719,
"logprob": -0.6953125,
"special": false,
"text": " subset"
},
{
"id": 315,
"logprob": -0.02909851,
"logprob": -0.0004863739,
"special": false,
"text": " of"
},
{
"id": 5780,
"logprob": -0.25854492,
"logprob": -0.02709961,
"special": false,
"text": " machine"
},
{
"id": 6975,
"logprob": -0.0061798096,
"logprob": -0.00022506714,
"special": false,
"text": " learning"
},
{
"id": 430,
"logprob": -1.3046875,
"logprob": -0.19726562,
"special": false,
"text": " that"
},
{
"id": 374,
"logprob": -1.5537109,
"id": 18065,
"logprob": -0.77734375,
"special": false,
"text": " is"
"text": " involves"
}
],
"top_tokens": null
},
"generated_text": " Deep learning is a subset of machine learning that is"
"generated_text": " Deep learning is a subset of machine learning that involves"
},
{
"details": {
@ -220,27 +220,27 @@
},
{
"id": 3923,
"logprob": -5.6328125,
"logprob": -6.21875,
"text": "What"
},
{
"id": 374,
"logprob": -1.2265625,
"logprob": -0.95703125,
"text": " is"
},
{
"id": 5655,
"logprob": -9.1015625,
"logprob": -9.9375,
"text": " deep"
},
{
"id": 6975,
"logprob": -1.8085938,
"logprob": -1.1328125,
"text": " learning"
},
{
"id": 30,
"logprob": -1.0439453,
"logprob": -1.75,
"text": "?"
}
],
@ -248,68 +248,68 @@
"tokens": [
{
"id": 18682,
"logprob": -2.1992188,
"logprob": -1.1796875,
"special": false,
"text": " Deep"
},
{
"id": 6975,
"logprob": -0.07897949,
"logprob": -0.005432129,
"special": false,
"text": " learning"
},
{
"id": 374,
"logprob": -0.27734375,
"logprob": -0.02758789,
"special": false,
"text": " is"
},
{
"id": 264,
"logprob": -0.37402344,
"logprob": -0.013366699,
"special": false,
"text": " a"
},
{
"id": 27084,
"logprob": -1.4511719,
"logprob": -0.6953125,
"special": false,
"text": " subset"
},
{
"id": 315,
"logprob": -0.02909851,
"logprob": -0.0004863739,
"special": false,
"text": " of"
},
{
"id": 5780,
"logprob": -0.25854492,
"logprob": -0.02709961,
"special": false,
"text": " machine"
},
{
"id": 6975,
"logprob": -0.0061798096,
"logprob": -0.00022506714,
"special": false,
"text": " learning"
},
{
"id": 430,
"logprob": -1.3046875,
"logprob": -0.19726562,
"special": false,
"text": " that"
},
{
"id": 374,
"logprob": -1.5537109,
"id": 18065,
"logprob": -0.77734375,
"special": false,
"text": " is"
"text": " involves"
}
],
"top_tokens": null
},
"generated_text": " Deep learning is a subset of machine learning that is"
"generated_text": " Deep learning is a subset of machine learning that involves"
},
{
"details": {
@ -324,27 +324,27 @@
},
{
"id": 3923,
"logprob": -5.6328125,
"logprob": -6.21875,
"text": "What"
},
{
"id": 374,
"logprob": -1.2265625,
"logprob": -0.95703125,
"text": " is"
},
{
"id": 5655,
"logprob": -9.1015625,
"logprob": -9.9375,
"text": " deep"
},
{
"id": 6975,
"logprob": -1.8085938,
"logprob": -1.1328125,
"text": " learning"
},
{
"id": 30,
"logprob": -1.0439453,
"logprob": -1.75,
"text": "?"
}
],
@ -352,67 +352,67 @@
"tokens": [
{
"id": 18682,
"logprob": -2.1992188,
"logprob": -1.1796875,
"special": false,
"text": " Deep"
},
{
"id": 6975,
"logprob": -0.07897949,
"logprob": -0.005432129,
"special": false,
"text": " learning"
},
{
"id": 374,
"logprob": -0.27734375,
"logprob": -0.02758789,
"special": false,
"text": " is"
},
{
"id": 264,
"logprob": -0.37402344,
"logprob": -0.013366699,
"special": false,
"text": " a"
},
{
"id": 27084,
"logprob": -1.4511719,
"logprob": -0.6953125,
"special": false,
"text": " subset"
},
{
"id": 315,
"logprob": -0.02909851,
"logprob": -0.0004863739,
"special": false,
"text": " of"
},
{
"id": 5780,
"logprob": -0.25854492,
"logprob": -0.02709961,
"special": false,
"text": " machine"
},
{
"id": 6975,
"logprob": -0.0061798096,
"logprob": -0.00022506714,
"special": false,
"text": " learning"
},
{
"id": 430,
"logprob": -1.3046875,
"logprob": -0.19726562,
"special": false,
"text": " that"
},
{
"id": 374,
"logprob": -1.5537109,
"id": 18065,
"logprob": -0.77734375,
"special": false,
"text": " is"
"text": " involves"
}
],
"top_tokens": null
},
"generated_text": " Deep learning is a subset of machine learning that is"
"generated_text": " Deep learning is a subset of machine learning that involves"
}
]

View File

@ -4,7 +4,9 @@ import pytest
@pytest.fixture(scope="module")
def flash_llama_fp8_kv_cache_handle(launcher):
with launcher(
"meta-llama/Meta-Llama-3-8B", num_shard=2, kv_cache_dtype="fp8_e5m2"
"neuralmagic/Meta-Llama-3-8B-Instruct-FP8-KV",
num_shard=2,
kv_cache_dtype="fp8_e4m3fn",
) as handle:
yield handle
@ -25,7 +27,7 @@ async def test_flash_llama_fp8_kv_cache(flash_llama_fp8_kv_cache, response_snaps
assert (
response.generated_text
== " Deep learning is a subset of machine learning that is"
== " Deep learning is a subset of machine learning that involves"
)
assert response.details.generated_tokens == 10
assert response == response_snapshot
@ -69,7 +71,7 @@ async def test_flash_llama_fp8_kv_cache_load(
assert len(responses) == 4
assert (
responses[0].generated_text
== " Deep learning is a subset of machine learning that is"
== " Deep learning is a subset of machine learning that involves"
)
assert all(
[r.generated_text == responses[0].generated_text for r in responses]