Update FP8 KV cache test to use checkpoint with scales

This commit is contained in:
Daniël de Kok 2024-10-21 11:18:52 +00:00
parent ba4ac96399
commit 1f18cb6aa6
4 changed files with 151 additions and 107 deletions

View File

@ -11,27 +11,27 @@
}, },
{ {
"id": 3923, "id": 3923,
"logprob": -5.6328125, "logprob": -6.1875,
"text": "What" "text": "What"
}, },
{ {
"id": 374, "id": 374,
"logprob": -1.2265625, "logprob": -0.93359375,
"text": " is" "text": " is"
}, },
{ {
"id": 5655, "id": 5655,
"logprob": -9.1015625, "logprob": -9.875,
"text": " deep" "text": " deep"
}, },
{ {
"id": 6975, "id": 6975,
"logprob": -1.8085938, "logprob": -1.1796875,
"text": " learning" "text": " learning"
}, },
{ {
"id": 30, "id": 30,
"logprob": -1.0439453, "logprob": -1.75,
"text": "?" "text": "?"
} }
], ],
@ -39,66 +39,66 @@
"tokens": [ "tokens": [
{ {
"id": 18682, "id": 18682,
"logprob": -2.1992188, "logprob": -1.109375,
"special": false, "special": false,
"text": " Deep" "text": " Deep"
}, },
{ {
"id": 6975, "id": 6975,
"logprob": -0.079956055, "logprob": -0.005432129,
"special": false, "special": false,
"text": " learning" "text": " learning"
}, },
{ {
"id": 374, "id": 374,
"logprob": -0.2763672, "logprob": -0.028808594,
"special": false, "special": false,
"text": " is" "text": " is"
}, },
{ {
"id": 264, "id": 264,
"logprob": -0.37548828, "logprob": -0.013671875,
"special": false, "special": false,
"text": " a" "text": " a"
}, },
{ {
"id": 27084, "id": 27084,
"logprob": -1.4628906, "logprob": -0.69921875,
"special": false, "special": false,
"text": " subset" "text": " subset"
}, },
{ {
"id": 315, "id": 315,
"logprob": -0.02885437, "logprob": -0.0005874634,
"special": false, "special": false,
"text": " of" "text": " of"
}, },
{ {
"id": 5780, "id": 5780,
"logprob": -0.2565918, "logprob": -0.026855469,
"special": false, "special": false,
"text": " machine" "text": " machine"
}, },
{ {
"id": 6975, "id": 6975,
"logprob": -0.0063438416, "logprob": -0.00020885468,
"special": false, "special": false,
"text": " learning" "text": " learning"
}, },
{ {
"id": 430, "id": 430,
"logprob": -1.3056641, "logprob": -0.17773438,
"special": false, "special": false,
"text": " that" "text": " that"
}, },
{ {
"id": 374, "id": 18065,
"logprob": -1.6035156, "logprob": -0.703125,
"special": false, "special": false,
"text": " is" "text": " involves"
} }
], ],
"top_tokens": null "top_tokens": null
}, },
"generated_text": " Deep learning is a subset of machine learning that is" "generated_text": " Deep learning is a subset of machine learning that involves"
} }

View File

@ -1,8 +1,8 @@
{ {
"details": { "details": {
"best_of_sequences": null, "best_of_sequences": null,
"finish_reason": "eos_token", "finish_reason": "length",
"generated_tokens": 3, "generated_tokens": 10,
"prefill": [ "prefill": [
{ {
"id": 128000, "id": 128000,
@ -11,22 +11,22 @@
}, },
{ {
"id": 374, "id": 374,
"logprob": -22.96875, "logprob": -18.0,
"text": " is" "text": " is"
}, },
{ {
"id": 5655, "id": 5655,
"logprob": -10.71875, "logprob": -11.75,
"text": " deep" "text": " deep"
}, },
{ {
"id": 6975, "id": 6975,
"logprob": -2.6992188, "logprob": -2.0625,
"text": " learning" "text": " learning"
}, },
{ {
"id": 30, "id": 30,
"logprob": -4.8398438, "logprob": -6.0,
"text": "?" "text": "?"
} }
], ],
@ -34,24 +34,66 @@
"tokens": [ "tokens": [
{ {
"id": 720, "id": 720,
"logprob": -0.4411621, "logprob": 0.0,
"special": false, "special": false,
"text": " \n" "text": " \n"
}, },
{ {
"id": 220, "id": 34564,
"logprob": -0.35864258, "logprob": -0.11279297,
"special": false, "special": false,
"text": " " "text": "Deep"
}, },
{ {
"id": 128001, "id": 6975,
"logprob": -0.16015625,
"special": false,
"text": " learning"
},
{
"id": 320,
"logprob": -0.25195312,
"special": false,
"text": " ("
},
{
"id": 16931,
"logprob": -1.703125,
"special": false,
"text": "DL"
},
{
"id": 8,
"logprob": 0.0, "logprob": 0.0,
"special": true, "special": false,
"text": "<|end_of_text|>" "text": ")"
},
{
"id": 374,
"logprob": -1.140625,
"special": false,
"text": " is"
},
{
"id": 264,
"logprob": 0.0,
"special": false,
"text": " a"
},
{
"id": 1207,
"logprob": -1.3125,
"special": false,
"text": " sub"
},
{
"id": 2630,
"logprob": 0.0,
"special": false,
"text": "field"
} }
], ],
"top_tokens": null "top_tokens": null
}, },
"generated_text": "What is deep learning? \n " "generated_text": "What is deep learning? \nDeep learning (DL) is a subfield"
} }

View File

@ -12,27 +12,27 @@
}, },
{ {
"id": 3923, "id": 3923,
"logprob": -5.6328125, "logprob": -6.1875,
"text": "What" "text": "What"
}, },
{ {
"id": 374, "id": 374,
"logprob": -1.2265625, "logprob": -0.93359375,
"text": " is" "text": " is"
}, },
{ {
"id": 5655, "id": 5655,
"logprob": -9.1015625, "logprob": -9.875,
"text": " deep" "text": " deep"
}, },
{ {
"id": 6975, "id": 6975,
"logprob": -1.8085938, "logprob": -1.1796875,
"text": " learning" "text": " learning"
}, },
{ {
"id": 30, "id": 30,
"logprob": -1.0439453, "logprob": -1.75,
"text": "?" "text": "?"
} }
], ],
@ -40,68 +40,68 @@
"tokens": [ "tokens": [
{ {
"id": 18682, "id": 18682,
"logprob": -2.1992188, "logprob": -1.109375,
"special": false, "special": false,
"text": " Deep" "text": " Deep"
}, },
{ {
"id": 6975, "id": 6975,
"logprob": -0.07897949, "logprob": -0.0047912598,
"special": false, "special": false,
"text": " learning" "text": " learning"
}, },
{ {
"id": 374, "id": 374,
"logprob": -0.27734375, "logprob": -0.025512695,
"special": false, "special": false,
"text": " is" "text": " is"
}, },
{ {
"id": 264, "id": 264,
"logprob": -0.37402344, "logprob": -0.012145996,
"special": false, "special": false,
"text": " a" "text": " a"
}, },
{ {
"id": 27084, "id": 27084,
"logprob": -1.4511719, "logprob": -0.72265625,
"special": false, "special": false,
"text": " subset" "text": " subset"
}, },
{ {
"id": 315, "id": 315,
"logprob": -0.02909851, "logprob": -0.0005760193,
"special": false, "special": false,
"text": " of" "text": " of"
}, },
{ {
"id": 5780, "id": 5780,
"logprob": -0.25854492, "logprob": -0.02722168,
"special": false, "special": false,
"text": " machine" "text": " machine"
}, },
{ {
"id": 6975, "id": 6975,
"logprob": -0.0061798096, "logprob": -0.00023651123,
"special": false, "special": false,
"text": " learning" "text": " learning"
}, },
{ {
"id": 430, "id": 430,
"logprob": -1.3046875, "logprob": -0.17285156,
"special": false, "special": false,
"text": " that" "text": " that"
}, },
{ {
"id": 374, "id": 18065,
"logprob": -1.5537109, "logprob": -0.703125,
"special": false, "special": false,
"text": " is" "text": " involves"
} }
], ],
"top_tokens": null "top_tokens": null
}, },
"generated_text": " Deep learning is a subset of machine learning that is" "generated_text": " Deep learning is a subset of machine learning that involves"
}, },
{ {
"details": { "details": {
@ -116,27 +116,27 @@
}, },
{ {
"id": 3923, "id": 3923,
"logprob": -5.6328125, "logprob": -6.21875,
"text": "What" "text": "What"
}, },
{ {
"id": 374, "id": 374,
"logprob": -1.2265625, "logprob": -0.95703125,
"text": " is" "text": " is"
}, },
{ {
"id": 5655, "id": 5655,
"logprob": -9.1015625, "logprob": -9.9375,
"text": " deep" "text": " deep"
}, },
{ {
"id": 6975, "id": 6975,
"logprob": -1.8085938, "logprob": -1.1328125,
"text": " learning" "text": " learning"
}, },
{ {
"id": 30, "id": 30,
"logprob": -1.0439453, "logprob": -1.75,
"text": "?" "text": "?"
} }
], ],
@ -144,68 +144,68 @@
"tokens": [ "tokens": [
{ {
"id": 18682, "id": 18682,
"logprob": -2.1992188, "logprob": -1.1796875,
"special": false, "special": false,
"text": " Deep" "text": " Deep"
}, },
{ {
"id": 6975, "id": 6975,
"logprob": -0.07897949, "logprob": -0.005432129,
"special": false, "special": false,
"text": " learning" "text": " learning"
}, },
{ {
"id": 374, "id": 374,
"logprob": -0.27734375, "logprob": -0.02758789,
"special": false, "special": false,
"text": " is" "text": " is"
}, },
{ {
"id": 264, "id": 264,
"logprob": -0.37402344, "logprob": -0.013366699,
"special": false, "special": false,
"text": " a" "text": " a"
}, },
{ {
"id": 27084, "id": 27084,
"logprob": -1.4511719, "logprob": -0.6953125,
"special": false, "special": false,
"text": " subset" "text": " subset"
}, },
{ {
"id": 315, "id": 315,
"logprob": -0.02909851, "logprob": -0.0004863739,
"special": false, "special": false,
"text": " of" "text": " of"
}, },
{ {
"id": 5780, "id": 5780,
"logprob": -0.25854492, "logprob": -0.02709961,
"special": false, "special": false,
"text": " machine" "text": " machine"
}, },
{ {
"id": 6975, "id": 6975,
"logprob": -0.0061798096, "logprob": -0.00022506714,
"special": false, "special": false,
"text": " learning" "text": " learning"
}, },
{ {
"id": 430, "id": 430,
"logprob": -1.3046875, "logprob": -0.19726562,
"special": false, "special": false,
"text": " that" "text": " that"
}, },
{ {
"id": 374, "id": 18065,
"logprob": -1.5537109, "logprob": -0.77734375,
"special": false, "special": false,
"text": " is" "text": " involves"
} }
], ],
"top_tokens": null "top_tokens": null
}, },
"generated_text": " Deep learning is a subset of machine learning that is" "generated_text": " Deep learning is a subset of machine learning that involves"
}, },
{ {
"details": { "details": {
@ -220,27 +220,27 @@
}, },
{ {
"id": 3923, "id": 3923,
"logprob": -5.6328125, "logprob": -6.21875,
"text": "What" "text": "What"
}, },
{ {
"id": 374, "id": 374,
"logprob": -1.2265625, "logprob": -0.95703125,
"text": " is" "text": " is"
}, },
{ {
"id": 5655, "id": 5655,
"logprob": -9.1015625, "logprob": -9.9375,
"text": " deep" "text": " deep"
}, },
{ {
"id": 6975, "id": 6975,
"logprob": -1.8085938, "logprob": -1.1328125,
"text": " learning" "text": " learning"
}, },
{ {
"id": 30, "id": 30,
"logprob": -1.0439453, "logprob": -1.75,
"text": "?" "text": "?"
} }
], ],
@ -248,68 +248,68 @@
"tokens": [ "tokens": [
{ {
"id": 18682, "id": 18682,
"logprob": -2.1992188, "logprob": -1.1796875,
"special": false, "special": false,
"text": " Deep" "text": " Deep"
}, },
{ {
"id": 6975, "id": 6975,
"logprob": -0.07897949, "logprob": -0.005432129,
"special": false, "special": false,
"text": " learning" "text": " learning"
}, },
{ {
"id": 374, "id": 374,
"logprob": -0.27734375, "logprob": -0.02758789,
"special": false, "special": false,
"text": " is" "text": " is"
}, },
{ {
"id": 264, "id": 264,
"logprob": -0.37402344, "logprob": -0.013366699,
"special": false, "special": false,
"text": " a" "text": " a"
}, },
{ {
"id": 27084, "id": 27084,
"logprob": -1.4511719, "logprob": -0.6953125,
"special": false, "special": false,
"text": " subset" "text": " subset"
}, },
{ {
"id": 315, "id": 315,
"logprob": -0.02909851, "logprob": -0.0004863739,
"special": false, "special": false,
"text": " of" "text": " of"
}, },
{ {
"id": 5780, "id": 5780,
"logprob": -0.25854492, "logprob": -0.02709961,
"special": false, "special": false,
"text": " machine" "text": " machine"
}, },
{ {
"id": 6975, "id": 6975,
"logprob": -0.0061798096, "logprob": -0.00022506714,
"special": false, "special": false,
"text": " learning" "text": " learning"
}, },
{ {
"id": 430, "id": 430,
"logprob": -1.3046875, "logprob": -0.19726562,
"special": false, "special": false,
"text": " that" "text": " that"
}, },
{ {
"id": 374, "id": 18065,
"logprob": -1.5537109, "logprob": -0.77734375,
"special": false, "special": false,
"text": " is" "text": " involves"
} }
], ],
"top_tokens": null "top_tokens": null
}, },
"generated_text": " Deep learning is a subset of machine learning that is" "generated_text": " Deep learning is a subset of machine learning that involves"
}, },
{ {
"details": { "details": {
@ -324,27 +324,27 @@
}, },
{ {
"id": 3923, "id": 3923,
"logprob": -5.6328125, "logprob": -6.21875,
"text": "What" "text": "What"
}, },
{ {
"id": 374, "id": 374,
"logprob": -1.2265625, "logprob": -0.95703125,
"text": " is" "text": " is"
}, },
{ {
"id": 5655, "id": 5655,
"logprob": -9.1015625, "logprob": -9.9375,
"text": " deep" "text": " deep"
}, },
{ {
"id": 6975, "id": 6975,
"logprob": -1.8085938, "logprob": -1.1328125,
"text": " learning" "text": " learning"
}, },
{ {
"id": 30, "id": 30,
"logprob": -1.0439453, "logprob": -1.75,
"text": "?" "text": "?"
} }
], ],
@ -352,67 +352,67 @@
"tokens": [ "tokens": [
{ {
"id": 18682, "id": 18682,
"logprob": -2.1992188, "logprob": -1.1796875,
"special": false, "special": false,
"text": " Deep" "text": " Deep"
}, },
{ {
"id": 6975, "id": 6975,
"logprob": -0.07897949, "logprob": -0.005432129,
"special": false, "special": false,
"text": " learning" "text": " learning"
}, },
{ {
"id": 374, "id": 374,
"logprob": -0.27734375, "logprob": -0.02758789,
"special": false, "special": false,
"text": " is" "text": " is"
}, },
{ {
"id": 264, "id": 264,
"logprob": -0.37402344, "logprob": -0.013366699,
"special": false, "special": false,
"text": " a" "text": " a"
}, },
{ {
"id": 27084, "id": 27084,
"logprob": -1.4511719, "logprob": -0.6953125,
"special": false, "special": false,
"text": " subset" "text": " subset"
}, },
{ {
"id": 315, "id": 315,
"logprob": -0.02909851, "logprob": -0.0004863739,
"special": false, "special": false,
"text": " of" "text": " of"
}, },
{ {
"id": 5780, "id": 5780,
"logprob": -0.25854492, "logprob": -0.02709961,
"special": false, "special": false,
"text": " machine" "text": " machine"
}, },
{ {
"id": 6975, "id": 6975,
"logprob": -0.0061798096, "logprob": -0.00022506714,
"special": false, "special": false,
"text": " learning" "text": " learning"
}, },
{ {
"id": 430, "id": 430,
"logprob": -1.3046875, "logprob": -0.19726562,
"special": false, "special": false,
"text": " that" "text": " that"
}, },
{ {
"id": 374, "id": 18065,
"logprob": -1.5537109, "logprob": -0.77734375,
"special": false, "special": false,
"text": " is" "text": " involves"
} }
], ],
"top_tokens": null "top_tokens": null
}, },
"generated_text": " Deep learning is a subset of machine learning that is" "generated_text": " Deep learning is a subset of machine learning that involves"
} }
] ]

View File

@ -4,7 +4,9 @@ import pytest
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
def flash_llama_fp8_kv_cache_handle(launcher): def flash_llama_fp8_kv_cache_handle(launcher):
with launcher( with launcher(
"meta-llama/Meta-Llama-3-8B", num_shard=2, kv_cache_dtype="fp8_e5m2" "neuralmagic/Meta-Llama-3-8B-Instruct-FP8-KV",
num_shard=2,
kv_cache_dtype="fp8_e4m3fn",
) as handle: ) as handle:
yield handle yield handle
@ -25,7 +27,7 @@ async def test_flash_llama_fp8_kv_cache(flash_llama_fp8_kv_cache, response_snaps
assert ( assert (
response.generated_text response.generated_text
== " Deep learning is a subset of machine learning that is" == " Deep learning is a subset of machine learning that involves"
) )
assert response.details.generated_tokens == 10 assert response.details.generated_tokens == 10
assert response == response_snapshot assert response == response_snapshot
@ -69,7 +71,7 @@ async def test_flash_llama_fp8_kv_cache_load(
assert len(responses) == 4 assert len(responses) == 4
assert ( assert (
responses[0].generated_text responses[0].generated_text
== " Deep learning is a subset of machine learning that is" == " Deep learning is a subset of machine learning that involves"
) )
assert all( assert all(
[r.generated_text == responses[0].generated_text for r in responses] [r.generated_text == responses[0].generated_text for r in responses]