Test Marlin MoE with desc_act=true (#2622)

Update the Mixtral GPTQ test to use a model with `desc_act=true` and
`group_size!=-1` to ensure that we are checking activation
sorting/non-full K (with tensor parallelism). The `desc_act=false` case
is already checked by the Mixtral AWQ test.
This commit is contained in:
Daniël de Kok 2024-10-21 12:50:35 +02:00 committed by GitHub
parent 5e0fb46821
commit 7f54b7336a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 314 additions and 212 deletions

View File

@ -10,80 +10,95 @@
"text": "<s>" "text": "<s>"
}, },
{ {
"id": 3735, "id": 1824,
"logprob": -11.0078125, "logprob": -9.2890625,
"text": "Test" "text": "What"
}, },
{ {
"id": 2159, "id": 349,
"logprob": -13.59375, "logprob": -1.1503906,
"text": "request" "text": "is"
},
{
"id": 3534,
"logprob": -9.5859375,
"text": "deep"
},
{
"id": 5168,
"logprob": -1.3945312,
"text": "learning"
},
{
"id": 28804,
"logprob": -0.4555664,
"text": "?"
} }
], ],
"seed": null, "seed": null,
"tokens": [ "tokens": [
{ {
"id": 13, "id": 13,
"logprob": -1.7089844, "logprob": -0.6953125,
"special": false, "special": false,
"text": "\n" "text": "\n"
}, },
{ {
"id": 13, "id": 13,
"logprob": -0.68847656, "logprob": -0.4777832,
"special": false, "special": false,
"text": "\n" "text": "\n"
}, },
{ {
"id": 28771, "id": 23229,
"logprob": -1.9394531, "logprob": -0.13256836,
"special": false, "special": false,
"text": "#" "text": "Deep"
}, },
{ {
"id": 3735, "id": 5168,
"logprob": -2.8808594, "logprob": -0.023849487,
"special": false, "special": false,
"text": " Test" "text": " learning"
}, },
{ {
"id": 2159, "id": 349,
"logprob": -0.37280273, "logprob": -0.13977051,
"special": false, "special": false,
"text": " request" "text": " is"
}, },
{ {
"id": 13, "id": 264,
"logprob": -0.26098633, "logprob": -0.14489746,
"special": false, "special": false,
"text": "\n" "text": " a"
}, },
{ {
"id": 13, "id": 19804,
"logprob": -0.0017137527, "logprob": -0.63183594,
"special": false, "special": false,
"text": "\n" "text": " subset"
}, },
{ {
"id": 1064, "id": 302,
"logprob": -2.2695312, "logprob": -0.010314941,
"special": false, "special": false,
"text": "##" "text": " of"
}, },
{ {
"id": 3735, "id": 5599,
"logprob": -1.9238281, "logprob": -0.0635376,
"special": false, "special": false,
"text": " Test" "text": " machine"
}, },
{ {
"id": 2159, "id": 5168,
"logprob": -0.48828125, "logprob": -0.0028572083,
"special": false, "special": false,
"text": " request" "text": " learning"
} }
], ],
"top_tokens": null "top_tokens": null
}, },
"generated_text": "\n\n# Test request\n\n## Test request" "generated_text": "\n\nDeep learning is a subset of machine learning"
} }

View File

@ -10,42 +10,28 @@
"text": "<s>" "text": "<s>"
}, },
{ {
"id": 3735, "id": 349,
"logprob": -11.0078125, "logprob": -12.0546875,
"text": "Test" "text": "is"
}, },
{ {
"id": 2159, "id": 3534,
"logprob": -13.59375, "logprob": -10.53125,
"text": "request" "text": "deep"
},
{
"id": 5168,
"logprob": -2.71875,
"text": "learning"
},
{
"id": 28804,
"logprob": -5.0078125,
"text": "?"
} }
], ],
"seed": 0, "seed": 0,
"tokens": [ "tokens": [
{
"id": 13,
"logprob": -0.34838867,
"special": false,
"text": "\n"
},
{
"id": 13940,
"logprob": -0.38916016,
"special": false,
"text": "``"
},
{
"id": 28832,
"logprob": 0.0,
"special": false,
"text": "`"
},
{
"id": 3371,
"logprob": -1.2529297,
"special": false,
"text": "json"
},
{ {
"id": 13, "id": 13,
"logprob": 0.0, "logprob": 0.0,
@ -53,37 +39,61 @@
"text": "\n" "text": "\n"
}, },
{ {
"id": 28751, "id": 23229,
"logprob": 0.0, "logprob": -0.18237305,
"special": false, "special": false,
"text": "{" "text": "Deep"
}, },
{ {
"id": 13, "id": 17504,
"logprob": 0.0, "logprob": 0.0,
"special": false, "special": false,
"text": "\n" "text": " Learning"
}, },
{ {
"id": 2287, "id": 349,
"logprob": 0.0, "logprob": 0.0,
"special": false, "special": false,
"text": " " "text": " is"
}, },
{ {
"id": 345, "id": 264,
"logprob": 0.0, "logprob": 0.0,
"special": false, "special": false,
"text": " \"" "text": " a"
}, },
{ {
"id": 3134, "id": 19804,
"logprob": -0.640625, "logprob": 0.0,
"special": false, "special": false,
"text": "request" "text": " subset"
},
{
"id": 302,
"logprob": 0.0,
"special": false,
"text": " of"
},
{
"id": 13253,
"logprob": -0.6040039,
"special": false,
"text": " Machine"
},
{
"id": 17504,
"logprob": 0.0,
"special": false,
"text": " Learning"
},
{
"id": 28725,
"logprob": -0.11621094,
"special": false,
"text": ","
} }
], ],
"top_tokens": null "top_tokens": null
}, },
"generated_text": "Test request\n```json\n{\n \"request" "generated_text": "What is deep learning?\nDeep Learning is a subset of Machine Learning,"
} }

View File

@ -11,82 +11,97 @@
"text": "<s>" "text": "<s>"
}, },
{ {
"id": 3735, "id": 1824,
"logprob": -11.0078125, "logprob": -9.2890625,
"text": "Test" "text": "What"
}, },
{ {
"id": 2159, "id": 349,
"logprob": -13.59375, "logprob": -1.1503906,
"text": "request" "text": "is"
},
{
"id": 3534,
"logprob": -9.5859375,
"text": "deep"
},
{
"id": 5168,
"logprob": -1.3945312,
"text": "learning"
},
{
"id": 28804,
"logprob": -0.4555664,
"text": "?"
} }
], ],
"seed": null, "seed": null,
"tokens": [ "tokens": [
{ {
"id": 13, "id": 13,
"logprob": -1.7089844, "logprob": -0.6953125,
"special": false, "special": false,
"text": "\n" "text": "\n"
}, },
{ {
"id": 13, "id": 13,
"logprob": -0.68847656, "logprob": -0.4777832,
"special": false, "special": false,
"text": "\n" "text": "\n"
}, },
{ {
"id": 28771, "id": 23229,
"logprob": -1.9394531, "logprob": -0.13232422,
"special": false, "special": false,
"text": "#" "text": "Deep"
}, },
{ {
"id": 3735, "id": 5168,
"logprob": -2.8828125, "logprob": -0.023834229,
"special": false, "special": false,
"text": " Test" "text": " learning"
}, },
{ {
"id": 2159, "id": 349,
"logprob": -0.37329102, "logprob": -0.13977051,
"special": false, "special": false,
"text": " request" "text": " is"
}, },
{ {
"id": 13, "id": 264,
"logprob": -0.2602539, "logprob": -0.14416504,
"special": false, "special": false,
"text": "\n" "text": " a"
}, },
{ {
"id": 13, "id": 19804,
"logprob": -0.0017185211, "logprob": -0.63183594,
"special": false, "special": false,
"text": "\n" "text": " subset"
}, },
{ {
"id": 1064, "id": 302,
"logprob": -2.2753906, "logprob": -0.010223389,
"special": false, "special": false,
"text": "##" "text": " of"
}, },
{ {
"id": 3735, "id": 5599,
"logprob": -1.9316406, "logprob": -0.064208984,
"special": false, "special": false,
"text": " Test" "text": " machine"
}, },
{ {
"id": 2159, "id": 5168,
"logprob": -0.48217773, "logprob": -0.0028266907,
"special": false, "special": false,
"text": " request" "text": " learning"
} }
], ],
"top_tokens": null "top_tokens": null
}, },
"generated_text": "\n\n# Test request\n\n## Test request" "generated_text": "\n\nDeep learning is a subset of machine learning"
}, },
{ {
"details": { "details": {
@ -100,82 +115,97 @@
"text": "<s>" "text": "<s>"
}, },
{ {
"id": 3735, "id": 1824,
"logprob": -11.0078125, "logprob": -9.2890625,
"text": "Test" "text": "What"
}, },
{ {
"id": 2159, "id": 349,
"logprob": -13.59375, "logprob": -1.1425781,
"text": "request" "text": "is"
},
{
"id": 3534,
"logprob": -9.59375,
"text": "deep"
},
{
"id": 5168,
"logprob": -1.390625,
"text": "learning"
},
{
"id": 28804,
"logprob": -0.45532227,
"text": "?"
} }
], ],
"seed": null, "seed": null,
"tokens": [ "tokens": [
{ {
"id": 13, "id": 13,
"logprob": -1.7089844, "logprob": -0.6953125,
"special": false, "special": false,
"text": "\n" "text": "\n"
}, },
{ {
"id": 13, "id": 13,
"logprob": -0.68847656, "logprob": -0.48339844,
"special": false, "special": false,
"text": "\n" "text": "\n"
}, },
{ {
"id": 28771, "id": 23229,
"logprob": -1.9394531, "logprob": -0.13256836,
"special": false, "special": false,
"text": "#" "text": "Deep"
}, },
{ {
"id": 3735, "id": 5168,
"logprob": -2.8828125, "logprob": -0.02420044,
"special": false, "special": false,
"text": " Test" "text": " learning"
}, },
{ {
"id": 2159, "id": 349,
"logprob": -0.37329102, "logprob": -0.13977051,
"special": false, "special": false,
"text": " request" "text": " is"
}, },
{ {
"id": 13, "id": 264,
"logprob": -0.2602539, "logprob": -0.14501953,
"special": false, "special": false,
"text": "\n" "text": " a"
}, },
{ {
"id": 13, "id": 19804,
"logprob": -0.0017185211, "logprob": -0.63134766,
"special": false, "special": false,
"text": "\n" "text": " subset"
}, },
{ {
"id": 1064, "id": 302,
"logprob": -2.2753906, "logprob": -0.010223389,
"special": false, "special": false,
"text": "##" "text": " of"
}, },
{ {
"id": 3735, "id": 5599,
"logprob": -1.9316406, "logprob": -0.06427002,
"special": false, "special": false,
"text": " Test" "text": " machine"
}, },
{ {
"id": 2159, "id": 5168,
"logprob": -0.48217773, "logprob": -0.002817154,
"special": false, "special": false,
"text": " request" "text": " learning"
} }
], ],
"top_tokens": null "top_tokens": null
}, },
"generated_text": "\n\n# Test request\n\n## Test request" "generated_text": "\n\nDeep learning is a subset of machine learning"
}, },
{ {
"details": { "details": {
@ -189,82 +219,97 @@
"text": "<s>" "text": "<s>"
}, },
{ {
"id": 3735, "id": 1824,
"logprob": -11.0078125, "logprob": -9.2890625,
"text": "Test" "text": "What"
}, },
{ {
"id": 2159, "id": 349,
"logprob": -13.59375, "logprob": -1.1425781,
"text": "request" "text": "is"
},
{
"id": 3534,
"logprob": -9.59375,
"text": "deep"
},
{
"id": 5168,
"logprob": -1.390625,
"text": "learning"
},
{
"id": 28804,
"logprob": -0.45532227,
"text": "?"
} }
], ],
"seed": null, "seed": null,
"tokens": [ "tokens": [
{ {
"id": 13, "id": 13,
"logprob": -1.7089844, "logprob": -0.6953125,
"special": false, "special": false,
"text": "\n" "text": "\n"
}, },
{ {
"id": 13, "id": 13,
"logprob": -0.68847656, "logprob": -0.48339844,
"special": false, "special": false,
"text": "\n" "text": "\n"
}, },
{ {
"id": 28771, "id": 23229,
"logprob": -1.9394531, "logprob": -0.13256836,
"special": false, "special": false,
"text": "#" "text": "Deep"
}, },
{ {
"id": 3735, "id": 5168,
"logprob": -2.8828125, "logprob": -0.02420044,
"special": false, "special": false,
"text": " Test" "text": " learning"
}, },
{ {
"id": 2159, "id": 349,
"logprob": -0.37329102, "logprob": -0.13977051,
"special": false, "special": false,
"text": " request" "text": " is"
}, },
{ {
"id": 13, "id": 264,
"logprob": -0.2602539, "logprob": -0.14501953,
"special": false, "special": false,
"text": "\n" "text": " a"
}, },
{ {
"id": 13, "id": 19804,
"logprob": -0.0017185211, "logprob": -0.63134766,
"special": false, "special": false,
"text": "\n" "text": " subset"
}, },
{ {
"id": 1064, "id": 302,
"logprob": -2.2753906, "logprob": -0.010223389,
"special": false, "special": false,
"text": "##" "text": " of"
}, },
{ {
"id": 3735, "id": 5599,
"logprob": -1.9316406, "logprob": -0.06427002,
"special": false, "special": false,
"text": " Test" "text": " machine"
}, },
{ {
"id": 2159, "id": 5168,
"logprob": -0.48217773, "logprob": -0.002817154,
"special": false, "special": false,
"text": " request" "text": " learning"
} }
], ],
"top_tokens": null "top_tokens": null
}, },
"generated_text": "\n\n# Test request\n\n## Test request" "generated_text": "\n\nDeep learning is a subset of machine learning"
}, },
{ {
"details": { "details": {
@ -278,81 +323,96 @@
"text": "<s>" "text": "<s>"
}, },
{ {
"id": 3735, "id": 1824,
"logprob": -11.0078125, "logprob": -9.2890625,
"text": "Test" "text": "What"
}, },
{ {
"id": 2159, "id": 349,
"logprob": -13.59375, "logprob": -1.1425781,
"text": "request" "text": "is"
},
{
"id": 3534,
"logprob": -9.59375,
"text": "deep"
},
{
"id": 5168,
"logprob": -1.390625,
"text": "learning"
},
{
"id": 28804,
"logprob": -0.45532227,
"text": "?"
} }
], ],
"seed": null, "seed": null,
"tokens": [ "tokens": [
{ {
"id": 13, "id": 13,
"logprob": -1.7089844, "logprob": -0.6953125,
"special": false, "special": false,
"text": "\n" "text": "\n"
}, },
{ {
"id": 13, "id": 13,
"logprob": -0.68847656, "logprob": -0.48339844,
"special": false, "special": false,
"text": "\n" "text": "\n"
}, },
{ {
"id": 28771, "id": 23229,
"logprob": -1.9394531, "logprob": -0.13256836,
"special": false, "special": false,
"text": "#" "text": "Deep"
}, },
{ {
"id": 3735, "id": 5168,
"logprob": -2.8828125, "logprob": -0.02420044,
"special": false, "special": false,
"text": " Test" "text": " learning"
}, },
{ {
"id": 2159, "id": 349,
"logprob": -0.37329102, "logprob": -0.13977051,
"special": false, "special": false,
"text": " request" "text": " is"
}, },
{ {
"id": 13, "id": 264,
"logprob": -0.2602539, "logprob": -0.14501953,
"special": false, "special": false,
"text": "\n" "text": " a"
}, },
{ {
"id": 13, "id": 19804,
"logprob": -0.0017185211, "logprob": -0.63134766,
"special": false, "special": false,
"text": "\n" "text": " subset"
}, },
{ {
"id": 1064, "id": 302,
"logprob": -2.2753906, "logprob": -0.010223389,
"special": false, "special": false,
"text": "##" "text": " of"
}, },
{ {
"id": 3735, "id": 5599,
"logprob": -1.9316406, "logprob": -0.06427002,
"special": false, "special": false,
"text": " Test" "text": " machine"
}, },
{ {
"id": 2159, "id": 5168,
"logprob": -0.48217773, "logprob": -0.002817154,
"special": false, "special": false,
"text": " request" "text": " learning"
} }
], ],
"top_tokens": null "top_tokens": null
}, },
"generated_text": "\n\n# Test request\n\n## Test request" "generated_text": "\n\nDeep learning is a subset of machine learning"
} }
] ]

View File

@ -3,7 +3,11 @@ import pytest
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
def flash_mixtral_gptq_handle(launcher): def flash_mixtral_gptq_handle(launcher):
with launcher("TheBloke/Mixtral-8x7B-Instruct-v0.1-GPTQ", num_shard=2) as handle: with launcher(
"TheBloke/Mixtral-8x7B-Instruct-v0.1-GPTQ",
revision="gptq-4bit-128g-actorder_True",
num_shard=2,
) as handle:
yield handle yield handle
@ -16,7 +20,12 @@ async def flash_mixtral_gptq(flash_mixtral_gptq_handle):
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_flash_mixtral_gptq(flash_mixtral_gptq, response_snapshot): async def test_flash_mixtral_gptq(flash_mixtral_gptq, response_snapshot):
response = await flash_mixtral_gptq.generate( response = await flash_mixtral_gptq.generate(
"Test request", max_new_tokens=10, decoder_input_details=True "What is deep learning?", max_new_tokens=10, decoder_input_details=True
)
assert response.details.generated_tokens == 10
assert (
response.generated_text == "\n\nDeep learning is a subset of machine learning"
) )
assert response == response_snapshot assert response == response_snapshot
@ -25,7 +34,7 @@ async def test_flash_mixtral_gptq(flash_mixtral_gptq, response_snapshot):
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_flash_mixtral_gptq_all_params(flash_mixtral_gptq, response_snapshot): async def test_flash_mixtral_gptq_all_params(flash_mixtral_gptq, response_snapshot):
response = await flash_mixtral_gptq.generate( response = await flash_mixtral_gptq.generate(
"Test request", "What is deep learning?",
max_new_tokens=10, max_new_tokens=10,
repetition_penalty=1.2, repetition_penalty=1.2,
return_full_text=True, return_full_text=True,
@ -41,6 +50,10 @@ async def test_flash_mixtral_gptq_all_params(flash_mixtral_gptq, response_snapsh
) )
assert response.details.generated_tokens == 10 assert response.details.generated_tokens == 10
assert (
response.generated_text
== "What is deep learning?\nDeep Learning is a subset of Machine Learning,"
)
assert response == response_snapshot assert response == response_snapshot
@ -49,10 +62,14 @@ async def test_flash_mixtral_gptq_load(
flash_mixtral_gptq, generate_load, response_snapshot flash_mixtral_gptq, generate_load, response_snapshot
): ):
responses = await generate_load( responses = await generate_load(
flash_mixtral_gptq, "Test request", max_new_tokens=10, n=4 flash_mixtral_gptq, "What is deep learning?", max_new_tokens=10, n=4
) )
assert len(responses) == 4 assert len(responses) == 4
assert (
responses[0].generated_text
== "\n\nDeep learning is a subset of machine learning"
)
assert all( assert all(
[r.generated_text == responses[0].generated_text for r in responses] [r.generated_text == responses[0].generated_text for r in responses]
), f"{[r.generated_text for r in responses]}" ), f"{[r.generated_text for r in responses]}"