diff --git a/integration-tests/models/__snapshots__/test_flash_mixtral_gptq/test_flash_mixtral_gptq.json b/integration-tests/models/__snapshots__/test_flash_mixtral_gptq/test_flash_mixtral_gptq.json index 993bdaddc..b835bf075 100644 --- a/integration-tests/models/__snapshots__/test_flash_mixtral_gptq/test_flash_mixtral_gptq.json +++ b/integration-tests/models/__snapshots__/test_flash_mixtral_gptq/test_flash_mixtral_gptq.json @@ -10,80 +10,95 @@ "text": "" }, { - "id": 3735, - "logprob": -11.0078125, - "text": "Test" + "id": 1824, + "logprob": -9.2890625, + "text": "What" }, { - "id": 2159, - "logprob": -13.59375, - "text": "request" + "id": 349, + "logprob": -1.1503906, + "text": "is" + }, + { + "id": 3534, + "logprob": -9.5859375, + "text": "deep" + }, + { + "id": 5168, + "logprob": -1.3945312, + "text": "learning" + }, + { + "id": 28804, + "logprob": -0.4555664, + "text": "?" } ], "seed": null, "tokens": [ { "id": 13, - "logprob": -1.7089844, + "logprob": -0.6953125, "special": false, "text": "\n" }, { "id": 13, - "logprob": -0.68847656, + "logprob": -0.4777832, "special": false, "text": "\n" }, { - "id": 28771, - "logprob": -1.9394531, + "id": 23229, + "logprob": -0.13256836, "special": false, - "text": "#" + "text": "Deep" }, { - "id": 3735, - "logprob": -2.8808594, + "id": 5168, + "logprob": -0.023849487, "special": false, - "text": " Test" + "text": " learning" }, { - "id": 2159, - "logprob": -0.37280273, + "id": 349, + "logprob": -0.13977051, "special": false, - "text": " request" + "text": " is" }, { - "id": 13, - "logprob": -0.26098633, + "id": 264, + "logprob": -0.14489746, "special": false, - "text": "\n" + "text": " a" }, { - "id": 13, - "logprob": -0.0017137527, + "id": 19804, + "logprob": -0.63183594, "special": false, - "text": "\n" + "text": " subset" }, { - "id": 1064, - "logprob": -2.2695312, + "id": 302, + "logprob": -0.010314941, "special": false, - "text": "##" + "text": " of" }, { - "id": 3735, - "logprob": -1.9238281, + "id": 5599, + "logprob": -0.0635376, "special": false, - "text": " Test" + "text": " machine" }, { - "id": 2159, - "logprob": -0.48828125, + "id": 5168, + "logprob": -0.0028572083, "special": false, - "text": " request" + "text": " learning" } ], "top_tokens": null }, - "generated_text": "\n\n# Test request\n\n## Test request" + "generated_text": "\n\nDeep learning is a subset of machine learning" } diff --git a/integration-tests/models/__snapshots__/test_flash_mixtral_gptq/test_flash_mixtral_gptq_all_params.json b/integration-tests/models/__snapshots__/test_flash_mixtral_gptq/test_flash_mixtral_gptq_all_params.json index 94411eefb..77c885990 100644 --- a/integration-tests/models/__snapshots__/test_flash_mixtral_gptq/test_flash_mixtral_gptq_all_params.json +++ b/integration-tests/models/__snapshots__/test_flash_mixtral_gptq/test_flash_mixtral_gptq_all_params.json @@ -10,42 +10,28 @@ "text": "" }, { - "id": 3735, - "logprob": -11.0078125, - "text": "Test" + "id": 349, + "logprob": -12.0546875, + "text": "is" }, { - "id": 2159, - "logprob": -13.59375, - "text": "request" + "id": 3534, + "logprob": -10.53125, + "text": "deep" + }, + { + "id": 5168, + "logprob": -2.71875, + "text": "learning" + }, + { + "id": 28804, + "logprob": -5.0078125, + "text": "?" } ], "seed": 0, "tokens": [ - { - "id": 13, - "logprob": -0.34838867, - "special": false, - "text": "\n" - }, - { - "id": 13940, - "logprob": -0.38916016, - "special": false, - "text": "``" - }, - { - "id": 28832, - "logprob": 0.0, - "special": false, - "text": "`" - }, - { - "id": 3371, - "logprob": -1.2529297, - "special": false, - "text": "json" - }, { "id": 13, "logprob": 0.0, @@ -53,37 +39,61 @@ "text": "\n" }, { - "id": 28751, - "logprob": 0.0, + "id": 23229, + "logprob": -0.18237305, "special": false, - "text": "{" + "text": "Deep" }, { - "id": 13, + "id": 17504, "logprob": 0.0, "special": false, - "text": "\n" + "text": " Learning" }, { - "id": 2287, + "id": 349, "logprob": 0.0, "special": false, - "text": " " + "text": " is" }, { - "id": 345, + "id": 264, "logprob": 0.0, "special": false, - "text": " \"" + "text": " a" }, { - "id": 3134, - "logprob": -0.640625, + "id": 19804, + "logprob": 0.0, "special": false, - "text": "request" + "text": " subset" + }, + { + "id": 302, + "logprob": 0.0, + "special": false, + "text": " of" + }, + { + "id": 13253, + "logprob": -0.6040039, + "special": false, + "text": " Machine" + }, + { + "id": 17504, + "logprob": 0.0, + "special": false, + "text": " Learning" + }, + { + "id": 28725, + "logprob": -0.11621094, + "special": false, + "text": "," } ], "top_tokens": null }, - "generated_text": "Test request\n```json\n{\n \"request" + "generated_text": "What is deep learning?\nDeep Learning is a subset of Machine Learning," } diff --git a/integration-tests/models/__snapshots__/test_flash_mixtral_gptq/test_flash_mixtral_gptq_load.json b/integration-tests/models/__snapshots__/test_flash_mixtral_gptq/test_flash_mixtral_gptq_load.json index 19e306a38..959e3c557 100644 --- a/integration-tests/models/__snapshots__/test_flash_mixtral_gptq/test_flash_mixtral_gptq_load.json +++ b/integration-tests/models/__snapshots__/test_flash_mixtral_gptq/test_flash_mixtral_gptq_load.json @@ -11,82 +11,97 @@ "text": "" }, { - "id": 3735, - "logprob": -11.0078125, - "text": "Test" + "id": 1824, + "logprob": -9.2890625, + "text": "What" }, { - "id": 2159, - "logprob": -13.59375, - "text": "request" + "id": 349, + "logprob": -1.1503906, + "text": "is" + }, + { + "id": 3534, + "logprob": -9.5859375, + "text": "deep" + }, + { + "id": 5168, + "logprob": -1.3945312, + "text": "learning" + }, + { + "id": 28804, + "logprob": -0.4555664, + "text": "?" } ], "seed": null, "tokens": [ { "id": 13, - "logprob": -1.7089844, + "logprob": -0.6953125, "special": false, "text": "\n" }, { "id": 13, - "logprob": -0.68847656, + "logprob": -0.4777832, "special": false, "text": "\n" }, { - "id": 28771, - "logprob": -1.9394531, + "id": 23229, + "logprob": -0.13232422, "special": false, - "text": "#" + "text": "Deep" }, { - "id": 3735, - "logprob": -2.8828125, + "id": 5168, + "logprob": -0.023834229, "special": false, - "text": " Test" + "text": " learning" }, { - "id": 2159, - "logprob": -0.37329102, + "id": 349, + "logprob": -0.13977051, "special": false, - "text": " request" + "text": " is" }, { - "id": 13, - "logprob": -0.2602539, + "id": 264, + "logprob": -0.14416504, "special": false, - "text": "\n" + "text": " a" }, { - "id": 13, - "logprob": -0.0017185211, + "id": 19804, + "logprob": -0.63183594, "special": false, - "text": "\n" + "text": " subset" }, { - "id": 1064, - "logprob": -2.2753906, + "id": 302, + "logprob": -0.010223389, "special": false, - "text": "##" + "text": " of" }, { - "id": 3735, - "logprob": -1.9316406, + "id": 5599, + "logprob": -0.064208984, "special": false, - "text": " Test" + "text": " machine" }, { - "id": 2159, - "logprob": -0.48217773, + "id": 5168, + "logprob": -0.0028266907, "special": false, - "text": " request" + "text": " learning" } ], "top_tokens": null }, - "generated_text": "\n\n# Test request\n\n## Test request" + "generated_text": "\n\nDeep learning is a subset of machine learning" }, { "details": { @@ -100,82 +115,97 @@ "text": "" }, { - "id": 3735, - "logprob": -11.0078125, - "text": "Test" + "id": 1824, + "logprob": -9.2890625, + "text": "What" }, { - "id": 2159, - "logprob": -13.59375, - "text": "request" + "id": 349, + "logprob": -1.1425781, + "text": "is" + }, + { + "id": 3534, + "logprob": -9.59375, + "text": "deep" + }, + { + "id": 5168, + "logprob": -1.390625, + "text": "learning" + }, + { + "id": 28804, + "logprob": -0.45532227, + "text": "?" } ], "seed": null, "tokens": [ { "id": 13, - "logprob": -1.7089844, + "logprob": -0.6953125, "special": false, "text": "\n" }, { "id": 13, - "logprob": -0.68847656, + "logprob": -0.48339844, "special": false, "text": "\n" }, { - "id": 28771, - "logprob": -1.9394531, + "id": 23229, + "logprob": -0.13256836, "special": false, - "text": "#" + "text": "Deep" }, { - "id": 3735, - "logprob": -2.8828125, + "id": 5168, + "logprob": -0.02420044, "special": false, - "text": " Test" + "text": " learning" }, { - "id": 2159, - "logprob": -0.37329102, + "id": 349, + "logprob": -0.13977051, "special": false, - "text": " request" + "text": " is" }, { - "id": 13, - "logprob": -0.2602539, + "id": 264, + "logprob": -0.14501953, "special": false, - "text": "\n" + "text": " a" }, { - "id": 13, - "logprob": -0.0017185211, + "id": 19804, + "logprob": -0.63134766, "special": false, - "text": "\n" + "text": " subset" }, { - "id": 1064, - "logprob": -2.2753906, + "id": 302, + "logprob": -0.010223389, "special": false, - "text": "##" + "text": " of" }, { - "id": 3735, - "logprob": -1.9316406, + "id": 5599, + "logprob": -0.06427002, "special": false, - "text": " Test" + "text": " machine" }, { - "id": 2159, - "logprob": -0.48217773, + "id": 5168, + "logprob": -0.002817154, "special": false, - "text": " request" + "text": " learning" } ], "top_tokens": null }, - "generated_text": "\n\n# Test request\n\n## Test request" + "generated_text": "\n\nDeep learning is a subset of machine learning" }, { "details": { @@ -189,82 +219,97 @@ "text": "" }, { - "id": 3735, - "logprob": -11.0078125, - "text": "Test" + "id": 1824, + "logprob": -9.2890625, + "text": "What" }, { - "id": 2159, - "logprob": -13.59375, - "text": "request" + "id": 349, + "logprob": -1.1425781, + "text": "is" + }, + { + "id": 3534, + "logprob": -9.59375, + "text": "deep" + }, + { + "id": 5168, + "logprob": -1.390625, + "text": "learning" + }, + { + "id": 28804, + "logprob": -0.45532227, + "text": "?" } ], "seed": null, "tokens": [ { "id": 13, - "logprob": -1.7089844, + "logprob": -0.6953125, "special": false, "text": "\n" }, { "id": 13, - "logprob": -0.68847656, + "logprob": -0.48339844, "special": false, "text": "\n" }, { - "id": 28771, - "logprob": -1.9394531, + "id": 23229, + "logprob": -0.13256836, "special": false, - "text": "#" + "text": "Deep" }, { - "id": 3735, - "logprob": -2.8828125, + "id": 5168, + "logprob": -0.02420044, "special": false, - "text": " Test" + "text": " learning" }, { - "id": 2159, - "logprob": -0.37329102, + "id": 349, + "logprob": -0.13977051, "special": false, - "text": " request" + "text": " is" }, { - "id": 13, - "logprob": -0.2602539, + "id": 264, + "logprob": -0.14501953, "special": false, - "text": "\n" + "text": " a" }, { - "id": 13, - "logprob": -0.0017185211, + "id": 19804, + "logprob": -0.63134766, "special": false, - "text": "\n" + "text": " subset" }, { - "id": 1064, - "logprob": -2.2753906, + "id": 302, + "logprob": -0.010223389, "special": false, - "text": "##" + "text": " of" }, { - "id": 3735, - "logprob": -1.9316406, + "id": 5599, + "logprob": -0.06427002, "special": false, - "text": " Test" + "text": " machine" }, { - "id": 2159, - "logprob": -0.48217773, + "id": 5168, + "logprob": -0.002817154, "special": false, - "text": " request" + "text": " learning" } ], "top_tokens": null }, - "generated_text": "\n\n# Test request\n\n## Test request" + "generated_text": "\n\nDeep learning is a subset of machine learning" }, { "details": { @@ -278,81 +323,96 @@ "text": "" }, { - "id": 3735, - "logprob": -11.0078125, - "text": "Test" + "id": 1824, + "logprob": -9.2890625, + "text": "What" }, { - "id": 2159, - "logprob": -13.59375, - "text": "request" + "id": 349, + "logprob": -1.1425781, + "text": "is" + }, + { + "id": 3534, + "logprob": -9.59375, + "text": "deep" + }, + { + "id": 5168, + "logprob": -1.390625, + "text": "learning" + }, + { + "id": 28804, + "logprob": -0.45532227, + "text": "?" } ], "seed": null, "tokens": [ { "id": 13, - "logprob": -1.7089844, + "logprob": -0.6953125, "special": false, "text": "\n" }, { "id": 13, - "logprob": -0.68847656, + "logprob": -0.48339844, "special": false, "text": "\n" }, { - "id": 28771, - "logprob": -1.9394531, + "id": 23229, + "logprob": -0.13256836, "special": false, - "text": "#" + "text": "Deep" }, { - "id": 3735, - "logprob": -2.8828125, + "id": 5168, + "logprob": -0.02420044, "special": false, - "text": " Test" + "text": " learning" }, { - "id": 2159, - "logprob": -0.37329102, + "id": 349, + "logprob": -0.13977051, "special": false, - "text": " request" + "text": " is" }, { - "id": 13, - "logprob": -0.2602539, + "id": 264, + "logprob": -0.14501953, "special": false, - "text": "\n" + "text": " a" }, { - "id": 13, - "logprob": -0.0017185211, + "id": 19804, + "logprob": -0.63134766, "special": false, - "text": "\n" + "text": " subset" }, { - "id": 1064, - "logprob": -2.2753906, + "id": 302, + "logprob": -0.010223389, "special": false, - "text": "##" + "text": " of" }, { - "id": 3735, - "logprob": -1.9316406, + "id": 5599, + "logprob": -0.06427002, "special": false, - "text": " Test" + "text": " machine" }, { - "id": 2159, - "logprob": -0.48217773, + "id": 5168, + "logprob": -0.002817154, "special": false, - "text": " request" + "text": " learning" } ], "top_tokens": null }, - "generated_text": "\n\n# Test request\n\n## Test request" + "generated_text": "\n\nDeep learning is a subset of machine learning" } ] diff --git a/integration-tests/models/test_flash_mixtral_gptq.py b/integration-tests/models/test_flash_mixtral_gptq.py index eb8806284..47bcb0bf3 100644 --- a/integration-tests/models/test_flash_mixtral_gptq.py +++ b/integration-tests/models/test_flash_mixtral_gptq.py @@ -3,7 +3,11 @@ import pytest @pytest.fixture(scope="module") def flash_mixtral_gptq_handle(launcher): - with launcher("TheBloke/Mixtral-8x7B-Instruct-v0.1-GPTQ", num_shard=2) as handle: + with launcher( + "TheBloke/Mixtral-8x7B-Instruct-v0.1-GPTQ", + revision="gptq-4bit-128g-actorder_True", + num_shard=2, + ) as handle: yield handle @@ -16,7 +20,12 @@ async def flash_mixtral_gptq(flash_mixtral_gptq_handle): @pytest.mark.asyncio async def test_flash_mixtral_gptq(flash_mixtral_gptq, response_snapshot): response = await flash_mixtral_gptq.generate( - "Test request", max_new_tokens=10, decoder_input_details=True + "What is deep learning?", max_new_tokens=10, decoder_input_details=True + ) + + assert response.details.generated_tokens == 10 + assert ( + response.generated_text == "\n\nDeep learning is a subset of machine learning" ) assert response == response_snapshot @@ -25,7 +34,7 @@ async def test_flash_mixtral_gptq(flash_mixtral_gptq, response_snapshot): @pytest.mark.asyncio async def test_flash_mixtral_gptq_all_params(flash_mixtral_gptq, response_snapshot): response = await flash_mixtral_gptq.generate( - "Test request", + "What is deep learning?", max_new_tokens=10, repetition_penalty=1.2, return_full_text=True, @@ -41,6 +50,10 @@ async def test_flash_mixtral_gptq_all_params(flash_mixtral_gptq, response_snapsh ) assert response.details.generated_tokens == 10 + assert ( + response.generated_text + == "What is deep learning?\nDeep Learning is a subset of Machine Learning," + ) assert response == response_snapshot @@ -49,10 +62,14 @@ async def test_flash_mixtral_gptq_load( flash_mixtral_gptq, generate_load, response_snapshot ): responses = await generate_load( - flash_mixtral_gptq, "Test request", max_new_tokens=10, n=4 + flash_mixtral_gptq, "What is deep learning?", max_new_tokens=10, n=4 ) assert len(responses) == 4 + assert ( + responses[0].generated_text + == "\n\nDeep learning is a subset of machine learning" + ) assert all( [r.generated_text == responses[0].generated_text for r in responses] ), f"{[r.generated_text for r in responses]}"