diff --git a/integration-tests/models/__snapshots__/test_flash_neox/test_flash_neox.json b/integration-tests/models/__snapshots__/test_flash_neox/test_flash_neox.json index 787704ce..66ddbaef 100644 --- a/integration-tests/models/__snapshots__/test_flash_neox/test_flash_neox.json +++ b/integration-tests/models/__snapshots__/test_flash_neox/test_flash_neox.json @@ -7,157 +7,107 @@ { "id": 50278, "logprob": null, - "text": "<|prompter|>" + "text": "<|USER|>" }, { "id": 1276, - "logprob": -8.03125, + "logprob": -4.5546875, "text": "What" }, - { - "id": 310, - "logprob": -5.421875, - "text": " is" - }, - { - "id": 247, - "logprob": -2.1601562, - "text": " a" - }, - { - "id": 1167, - "logprob": -5.4609375, - "text": " mem" - }, - { - "id": 70, - "logprob": -0.005657196, - "text": "e" - }, - { - "id": 13, - "logprob": -7.28125, - "text": "," - }, - { - "id": 285, - "logprob": -0.2980957, - "text": " and" - }, - { - "id": 752, - "logprob": -2.1679688, - "text": " what" - }, { "id": 434, - "logprob": -5.6210938, + "logprob": -4.234375, "text": "'s" }, { - "id": 253, - "logprob": -0.81103516, - "text": " the" + "id": 634, + "logprob": -5.1054688, + "text": " your" }, { - "id": 2892, - "logprob": -6.6640625, - "text": " history" + "id": 12315, + "logprob": -9.953125, + "text": " mood" }, { - "id": 3212, - "logprob": -2.265625, - "text": " behind" - }, - { - "id": 436, - "logprob": -11.5078125, - "text": " this" - }, - { - "id": 3159, - "logprob": -2.1582031, - "text": " word" + "id": 3063, + "logprob": -4.0820312, + "text": " today" }, { "id": 32, - "logprob": -0.008720398, + "logprob": -0.15148926, "text": "?" }, { - "id": 0, - "logprob": -2.4726562, - "text": "<|endoftext|>" - }, - { - "id": 50281, - "logprob": -18.265625, - "text": "<|assistant|>" + "id": 50279, + "logprob": -0.27026367, + "text": "<|ASSISTANT|>" } ], "seed": null, "tokens": [ { - "id": 510, - "logprob": -0.63183594, + "id": 42, + "logprob": -0.88378906, "special": false, - "text": "The" + "text": "I" }, { - "id": 3159, - "logprob": -0.5390625, + "id": 1353, + "logprob": -0.94921875, "special": false, - "text": " word" + "text": "'m" }, { - "id": 346, - "logprob": -0.045684814, + "id": 417, + "logprob": -2.2402344, "special": false, - "text": " \"" + "text": " not" }, { - "id": 6441, - "logprob": -0.002090454, + "id": 2119, + "logprob": -0.3725586, "special": false, - "text": "mem" + "text": " sure" }, { - "id": 70, - "logprob": -1.3589859e-05, + "id": 13, + "logprob": -1.078125, "special": false, - "text": "e" + "text": "," }, { - "id": 3, - "logprob": -0.0009455681, + "id": 534, + "logprob": -0.67822266, "special": false, - "text": "\"" + "text": " which" }, { - "id": 369, - "logprob": -0.088012695, + "id": 310, + "logprob": -1.3837891, "special": false, - "text": " was" + "text": " is" }, { - "id": 806, - "logprob": -0.12585449, + "id": 253, + "logprob": -1.7050781, "special": false, - "text": " first" + "text": " the" }, { - "id": 908, - "logprob": -0.017196655, + "id": 1682, + "logprob": -0.052001953, "special": false, - "text": " used" + "text": " best" }, { - "id": 275, - "logprob": -0.49731445, + "id": 1039, + "logprob": -2.0390625, "special": false, - "text": " in" + "text": " way" } ] }, - "generated_text": "The word \"meme\" was first used in" + "generated_text": "I'm not sure, which is the best way" } diff --git a/integration-tests/models/__snapshots__/test_flash_neox/test_flash_neox_load.json b/integration-tests/models/__snapshots__/test_flash_neox/test_flash_neox_load.json index 47d6a77e..5ef6b3a2 100644 --- a/integration-tests/models/__snapshots__/test_flash_neox/test_flash_neox_load.json +++ b/integration-tests/models/__snapshots__/test_flash_neox/test_flash_neox_load.json @@ -8,159 +8,109 @@ { "id": 50278, "logprob": null, - "text": "<|prompter|>" + "text": "<|USER|>" }, { "id": 1276, - "logprob": -8.03125, + "logprob": -4.5546875, "text": "What" }, - { - "id": 310, - "logprob": -5.421875, - "text": " is" - }, - { - "id": 247, - "logprob": -2.1601562, - "text": " a" - }, - { - "id": 1167, - "logprob": -5.4609375, - "text": " mem" - }, - { - "id": 70, - "logprob": -0.005657196, - "text": "e" - }, - { - "id": 13, - "logprob": -7.28125, - "text": "," - }, - { - "id": 285, - "logprob": -0.2980957, - "text": " and" - }, - { - "id": 752, - "logprob": -2.1679688, - "text": " what" - }, { "id": 434, - "logprob": -5.6210938, + "logprob": -4.234375, "text": "'s" }, { - "id": 253, - "logprob": -0.81103516, - "text": " the" + "id": 634, + "logprob": -5.21875, + "text": " your" }, { - "id": 2892, - "logprob": -6.6640625, - "text": " history" + "id": 12315, + "logprob": -9.9375, + "text": " mood" }, { - "id": 3212, - "logprob": -2.265625, - "text": " behind" - }, - { - "id": 436, - "logprob": -11.5078125, - "text": " this" - }, - { - "id": 3159, - "logprob": -2.1582031, - "text": " word" + "id": 3063, + "logprob": -4.1015625, + "text": " today" }, { "id": 32, - "logprob": -0.008720398, + "logprob": -0.15319824, "text": "?" }, { - "id": 0, - "logprob": -2.4726562, - "text": "<|endoftext|>" - }, - { - "id": 50281, - "logprob": -18.265625, - "text": "<|assistant|>" + "id": 50279, + "logprob": -0.2614746, + "text": "<|ASSISTANT|>" } ], "seed": null, "tokens": [ { - "id": 510, - "logprob": -0.63183594, + "id": 42, + "logprob": -0.8886719, "special": false, - "text": "The" + "text": "I" }, { - "id": 3159, - "logprob": -0.5488281, + "id": 1353, + "logprob": -0.98046875, "special": false, - "text": " word" + "text": "'m" }, { - "id": 346, - "logprob": -0.045684814, + "id": 417, + "logprob": -2.2265625, "special": false, - "text": " \"" + "text": " not" }, { - "id": 6441, - "logprob": -0.00207901, + "id": 2119, + "logprob": -0.3479004, "special": false, - "text": "mem" + "text": " sure" }, { - "id": 70, - "logprob": -1.335144e-05, + "id": 13, + "logprob": -1.0117188, "special": false, - "text": "e" + "text": "," }, { - "id": 3, - "logprob": -0.00097227097, + "id": 534, + "logprob": -0.67871094, "special": false, - "text": "\"" + "text": " which" }, { - "id": 369, - "logprob": -0.0892334, + "id": 310, + "logprob": -1.421875, "special": false, - "text": " was" + "text": " is" }, { - "id": 806, - "logprob": -0.12463379, + "id": 253, + "logprob": -1.7382812, "special": false, - "text": " first" + "text": " the" }, { - "id": 908, - "logprob": -0.01737976, + "id": 1682, + "logprob": -0.051330566, "special": false, - "text": " used" + "text": " best" }, { - "id": 275, - "logprob": -0.50341797, + "id": 1039, + "logprob": -2.0390625, "special": false, - "text": " in" + "text": " way" } ] }, - "generated_text": "The word \"meme\" was first used in" + "generated_text": "I'm not sure, which is the best way" }, { "details": { @@ -171,159 +121,109 @@ { "id": 50278, "logprob": null, - "text": "<|prompter|>" + "text": "<|USER|>" }, { "id": 1276, - "logprob": -8.03125, + "logprob": -4.5546875, "text": "What" }, - { - "id": 310, - "logprob": -5.421875, - "text": " is" - }, - { - "id": 247, - "logprob": -2.1601562, - "text": " a" - }, - { - "id": 1167, - "logprob": -5.4609375, - "text": " mem" - }, - { - "id": 70, - "logprob": -0.005657196, - "text": "e" - }, - { - "id": 13, - "logprob": -7.28125, - "text": "," - }, - { - "id": 285, - "logprob": -0.2980957, - "text": " and" - }, - { - "id": 752, - "logprob": -2.1679688, - "text": " what" - }, { "id": 434, - "logprob": -5.6210938, + "logprob": -4.234375, "text": "'s" }, { - "id": 253, - "logprob": -0.81103516, - "text": " the" + "id": 634, + "logprob": -5.1054688, + "text": " your" }, { - "id": 2892, - "logprob": -6.6640625, - "text": " history" + "id": 12315, + "logprob": -9.953125, + "text": " mood" }, { - "id": 3212, - "logprob": -2.265625, - "text": " behind" - }, - { - "id": 436, - "logprob": -11.5078125, - "text": " this" - }, - { - "id": 3159, - "logprob": -2.1582031, - "text": " word" + "id": 3063, + "logprob": -4.0820312, + "text": " today" }, { "id": 32, - "logprob": -0.008720398, + "logprob": -0.15148926, "text": "?" }, { - "id": 0, - "logprob": -2.4726562, - "text": "<|endoftext|>" - }, - { - "id": 50281, - "logprob": -18.265625, - "text": "<|assistant|>" + "id": 50279, + "logprob": -0.27026367, + "text": "<|ASSISTANT|>" } ], "seed": null, "tokens": [ { - "id": 510, - "logprob": -0.63183594, + "id": 42, + "logprob": -0.88378906, "special": false, - "text": "The" + "text": "I" }, { - "id": 3159, - "logprob": -0.5488281, + "id": 1353, + "logprob": -0.9819336, "special": false, - "text": " word" + "text": "'m" }, { - "id": 346, - "logprob": -0.045684814, + "id": 417, + "logprob": -2.2421875, "special": false, - "text": " \"" + "text": " not" }, { - "id": 6441, - "logprob": -0.00207901, + "id": 2119, + "logprob": -0.3474121, "special": false, - "text": "mem" + "text": " sure" }, { - "id": 70, - "logprob": -1.335144e-05, + "id": 13, + "logprob": -1.078125, "special": false, - "text": "e" + "text": "," }, { - "id": 3, - "logprob": -0.00097227097, + "id": 534, + "logprob": -0.69140625, "special": false, - "text": "\"" + "text": " which" }, { - "id": 369, - "logprob": -0.0892334, + "id": 310, + "logprob": -1.4072266, "special": false, - "text": " was" + "text": " is" }, { - "id": 806, - "logprob": -0.12463379, + "id": 253, + "logprob": -1.7041016, "special": false, - "text": " first" + "text": " the" }, { - "id": 908, - "logprob": -0.01737976, + "id": 1682, + "logprob": -0.053375244, "special": false, - "text": " used" + "text": " best" }, { - "id": 275, - "logprob": -0.50341797, + "id": 1039, + "logprob": -2.0351562, "special": false, - "text": " in" + "text": " way" } ] }, - "generated_text": "The word \"meme\" was first used in" + "generated_text": "I'm not sure, which is the best way" }, { "details": { @@ -334,159 +234,109 @@ { "id": 50278, "logprob": null, - "text": "<|prompter|>" + "text": "<|USER|>" }, { "id": 1276, - "logprob": -8.03125, + "logprob": -4.5546875, "text": "What" }, - { - "id": 310, - "logprob": -5.421875, - "text": " is" - }, - { - "id": 247, - "logprob": -2.1601562, - "text": " a" - }, - { - "id": 1167, - "logprob": -5.4609375, - "text": " mem" - }, - { - "id": 70, - "logprob": -0.005657196, - "text": "e" - }, - { - "id": 13, - "logprob": -7.28125, - "text": "," - }, - { - "id": 285, - "logprob": -0.2980957, - "text": " and" - }, - { - "id": 752, - "logprob": -2.1679688, - "text": " what" - }, { "id": 434, - "logprob": -5.6210938, + "logprob": -4.234375, "text": "'s" }, { - "id": 253, - "logprob": -0.81103516, - "text": " the" + "id": 634, + "logprob": -5.21875, + "text": " your" }, { - "id": 2892, - "logprob": -6.6640625, - "text": " history" + "id": 12315, + "logprob": -9.9375, + "text": " mood" }, { - "id": 3212, - "logprob": -2.265625, - "text": " behind" - }, - { - "id": 436, - "logprob": -11.5078125, - "text": " this" - }, - { - "id": 3159, - "logprob": -2.1582031, - "text": " word" + "id": 3063, + "logprob": -4.1015625, + "text": " today" }, { "id": 32, - "logprob": -0.008720398, + "logprob": -0.15319824, "text": "?" }, { - "id": 0, - "logprob": -2.4726562, - "text": "<|endoftext|>" - }, - { - "id": 50281, - "logprob": -18.265625, - "text": "<|assistant|>" + "id": 50279, + "logprob": -0.2614746, + "text": "<|ASSISTANT|>" } ], "seed": null, "tokens": [ { - "id": 510, - "logprob": -0.63183594, + "id": 42, + "logprob": -0.8886719, "special": false, - "text": "The" + "text": "I" }, { - "id": 3159, - "logprob": -0.5488281, + "id": 1353, + "logprob": -0.98046875, "special": false, - "text": " word" + "text": "'m" }, { - "id": 346, - "logprob": -0.045684814, + "id": 417, + "logprob": -2.2265625, "special": false, - "text": " \"" + "text": " not" }, { - "id": 6441, - "logprob": -0.00207901, + "id": 2119, + "logprob": -0.3479004, "special": false, - "text": "mem" + "text": " sure" }, { - "id": 70, - "logprob": -1.335144e-05, + "id": 13, + "logprob": -1.0117188, "special": false, - "text": "e" + "text": "," }, { - "id": 3, - "logprob": -0.00097227097, + "id": 534, + "logprob": -0.67871094, "special": false, - "text": "\"" + "text": " which" }, { - "id": 369, - "logprob": -0.0892334, + "id": 310, + "logprob": -1.421875, "special": false, - "text": " was" + "text": " is" }, { - "id": 806, - "logprob": -0.12463379, + "id": 253, + "logprob": -1.7382812, "special": false, - "text": " first" + "text": " the" }, { - "id": 908, - "logprob": -0.01737976, + "id": 1682, + "logprob": -0.051330566, "special": false, - "text": " used" + "text": " best" }, { - "id": 275, - "logprob": -0.50341797, + "id": 1039, + "logprob": -2.0390625, "special": false, - "text": " in" + "text": " way" } ] }, - "generated_text": "The word \"meme\" was first used in" + "generated_text": "I'm not sure, which is the best way" }, { "details": { @@ -497,158 +347,108 @@ { "id": 50278, "logprob": null, - "text": "<|prompter|>" + "text": "<|USER|>" }, { "id": 1276, - "logprob": -8.03125, + "logprob": -4.5546875, "text": "What" }, - { - "id": 310, - "logprob": -5.421875, - "text": " is" - }, - { - "id": 247, - "logprob": -2.1601562, - "text": " a" - }, - { - "id": 1167, - "logprob": -5.4609375, - "text": " mem" - }, - { - "id": 70, - "logprob": -0.005657196, - "text": "e" - }, - { - "id": 13, - "logprob": -7.28125, - "text": "," - }, - { - "id": 285, - "logprob": -0.2980957, - "text": " and" - }, - { - "id": 752, - "logprob": -2.1679688, - "text": " what" - }, { "id": 434, - "logprob": -5.6210938, + "logprob": -4.234375, "text": "'s" }, { - "id": 253, - "logprob": -0.81103516, - "text": " the" + "id": 634, + "logprob": -5.21875, + "text": " your" }, { - "id": 2892, - "logprob": -6.6640625, - "text": " history" + "id": 12315, + "logprob": -9.9375, + "text": " mood" }, { - "id": 3212, - "logprob": -2.265625, - "text": " behind" - }, - { - "id": 436, - "logprob": -11.5078125, - "text": " this" - }, - { - "id": 3159, - "logprob": -2.1582031, - "text": " word" + "id": 3063, + "logprob": -4.1015625, + "text": " today" }, { "id": 32, - "logprob": -0.008720398, + "logprob": -0.15319824, "text": "?" }, { - "id": 0, - "logprob": -2.4726562, - "text": "<|endoftext|>" - }, - { - "id": 50281, - "logprob": -18.265625, - "text": "<|assistant|>" + "id": 50279, + "logprob": -0.2614746, + "text": "<|ASSISTANT|>" } ], "seed": null, "tokens": [ { - "id": 510, - "logprob": -0.63183594, + "id": 42, + "logprob": -0.8886719, "special": false, - "text": "The" + "text": "I" }, { - "id": 3159, - "logprob": -0.5488281, + "id": 1353, + "logprob": -0.98046875, "special": false, - "text": " word" + "text": "'m" }, { - "id": 346, - "logprob": -0.045684814, + "id": 417, + "logprob": -2.2265625, "special": false, - "text": " \"" + "text": " not" }, { - "id": 6441, - "logprob": -0.00207901, + "id": 2119, + "logprob": -0.3479004, "special": false, - "text": "mem" + "text": " sure" }, { - "id": 70, - "logprob": -1.335144e-05, + "id": 13, + "logprob": -1.0117188, "special": false, - "text": "e" + "text": "," }, { - "id": 3, - "logprob": -0.00097227097, + "id": 534, + "logprob": -0.67871094, "special": false, - "text": "\"" + "text": " which" }, { - "id": 369, - "logprob": -0.0892334, + "id": 310, + "logprob": -1.421875, "special": false, - "text": " was" + "text": " is" }, { - "id": 806, - "logprob": -0.12463379, + "id": 253, + "logprob": -1.7382812, "special": false, - "text": " first" + "text": " the" }, { - "id": 908, - "logprob": -0.01737976, + "id": 1682, + "logprob": -0.051330566, "special": false, - "text": " used" + "text": " best" }, { - "id": 275, - "logprob": -0.50341797, + "id": 1039, + "logprob": -2.0390625, "special": false, - "text": " in" + "text": " way" } ] }, - "generated_text": "The word \"meme\" was first used in" + "generated_text": "I'm not sure, which is the best way" } ] diff --git a/integration-tests/models/__snapshots__/test_flash_neox_sharded/test_flash_neox.json b/integration-tests/models/__snapshots__/test_flash_neox_sharded/test_flash_neox.json new file mode 100644 index 00000000..787704ce --- /dev/null +++ b/integration-tests/models/__snapshots__/test_flash_neox_sharded/test_flash_neox.json @@ -0,0 +1,163 @@ +{ + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [ + { + "id": 50278, + "logprob": null, + "text": "<|prompter|>" + }, + { + "id": 1276, + "logprob": -8.03125, + "text": "What" + }, + { + "id": 310, + "logprob": -5.421875, + "text": " is" + }, + { + "id": 247, + "logprob": -2.1601562, + "text": " a" + }, + { + "id": 1167, + "logprob": -5.4609375, + "text": " mem" + }, + { + "id": 70, + "logprob": -0.005657196, + "text": "e" + }, + { + "id": 13, + "logprob": -7.28125, + "text": "," + }, + { + "id": 285, + "logprob": -0.2980957, + "text": " and" + }, + { + "id": 752, + "logprob": -2.1679688, + "text": " what" + }, + { + "id": 434, + "logprob": -5.6210938, + "text": "'s" + }, + { + "id": 253, + "logprob": -0.81103516, + "text": " the" + }, + { + "id": 2892, + "logprob": -6.6640625, + "text": " history" + }, + { + "id": 3212, + "logprob": -2.265625, + "text": " behind" + }, + { + "id": 436, + "logprob": -11.5078125, + "text": " this" + }, + { + "id": 3159, + "logprob": -2.1582031, + "text": " word" + }, + { + "id": 32, + "logprob": -0.008720398, + "text": "?" + }, + { + "id": 0, + "logprob": -2.4726562, + "text": "<|endoftext|>" + }, + { + "id": 50281, + "logprob": -18.265625, + "text": "<|assistant|>" + } + ], + "seed": null, + "tokens": [ + { + "id": 510, + "logprob": -0.63183594, + "special": false, + "text": "The" + }, + { + "id": 3159, + "logprob": -0.5390625, + "special": false, + "text": " word" + }, + { + "id": 346, + "logprob": -0.045684814, + "special": false, + "text": " \"" + }, + { + "id": 6441, + "logprob": -0.002090454, + "special": false, + "text": "mem" + }, + { + "id": 70, + "logprob": -1.3589859e-05, + "special": false, + "text": "e" + }, + { + "id": 3, + "logprob": -0.0009455681, + "special": false, + "text": "\"" + }, + { + "id": 369, + "logprob": -0.088012695, + "special": false, + "text": " was" + }, + { + "id": 806, + "logprob": -0.12585449, + "special": false, + "text": " first" + }, + { + "id": 908, + "logprob": -0.017196655, + "special": false, + "text": " used" + }, + { + "id": 275, + "logprob": -0.49731445, + "special": false, + "text": " in" + } + ] + }, + "generated_text": "The word \"meme\" was first used in" +} diff --git a/integration-tests/models/__snapshots__/test_flash_neox_sharded/test_flash_neox_load.json b/integration-tests/models/__snapshots__/test_flash_neox_sharded/test_flash_neox_load.json new file mode 100644 index 00000000..47d6a77e --- /dev/null +++ b/integration-tests/models/__snapshots__/test_flash_neox_sharded/test_flash_neox_load.json @@ -0,0 +1,654 @@ +[ + { + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [ + { + "id": 50278, + "logprob": null, + "text": "<|prompter|>" + }, + { + "id": 1276, + "logprob": -8.03125, + "text": "What" + }, + { + "id": 310, + "logprob": -5.421875, + "text": " is" + }, + { + "id": 247, + "logprob": -2.1601562, + "text": " a" + }, + { + "id": 1167, + "logprob": -5.4609375, + "text": " mem" + }, + { + "id": 70, + "logprob": -0.005657196, + "text": "e" + }, + { + "id": 13, + "logprob": -7.28125, + "text": "," + }, + { + "id": 285, + "logprob": -0.2980957, + "text": " and" + }, + { + "id": 752, + "logprob": -2.1679688, + "text": " what" + }, + { + "id": 434, + "logprob": -5.6210938, + "text": "'s" + }, + { + "id": 253, + "logprob": -0.81103516, + "text": " the" + }, + { + "id": 2892, + "logprob": -6.6640625, + "text": " history" + }, + { + "id": 3212, + "logprob": -2.265625, + "text": " behind" + }, + { + "id": 436, + "logprob": -11.5078125, + "text": " this" + }, + { + "id": 3159, + "logprob": -2.1582031, + "text": " word" + }, + { + "id": 32, + "logprob": -0.008720398, + "text": "?" + }, + { + "id": 0, + "logprob": -2.4726562, + "text": "<|endoftext|>" + }, + { + "id": 50281, + "logprob": -18.265625, + "text": "<|assistant|>" + } + ], + "seed": null, + "tokens": [ + { + "id": 510, + "logprob": -0.63183594, + "special": false, + "text": "The" + }, + { + "id": 3159, + "logprob": -0.5488281, + "special": false, + "text": " word" + }, + { + "id": 346, + "logprob": -0.045684814, + "special": false, + "text": " \"" + }, + { + "id": 6441, + "logprob": -0.00207901, + "special": false, + "text": "mem" + }, + { + "id": 70, + "logprob": -1.335144e-05, + "special": false, + "text": "e" + }, + { + "id": 3, + "logprob": -0.00097227097, + "special": false, + "text": "\"" + }, + { + "id": 369, + "logprob": -0.0892334, + "special": false, + "text": " was" + }, + { + "id": 806, + "logprob": -0.12463379, + "special": false, + "text": " first" + }, + { + "id": 908, + "logprob": -0.01737976, + "special": false, + "text": " used" + }, + { + "id": 275, + "logprob": -0.50341797, + "special": false, + "text": " in" + } + ] + }, + "generated_text": "The word \"meme\" was first used in" + }, + { + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [ + { + "id": 50278, + "logprob": null, + "text": "<|prompter|>" + }, + { + "id": 1276, + "logprob": -8.03125, + "text": "What" + }, + { + "id": 310, + "logprob": -5.421875, + "text": " is" + }, + { + "id": 247, + "logprob": -2.1601562, + "text": " a" + }, + { + "id": 1167, + "logprob": -5.4609375, + "text": " mem" + }, + { + "id": 70, + "logprob": -0.005657196, + "text": "e" + }, + { + "id": 13, + "logprob": -7.28125, + "text": "," + }, + { + "id": 285, + "logprob": -0.2980957, + "text": " and" + }, + { + "id": 752, + "logprob": -2.1679688, + "text": " what" + }, + { + "id": 434, + "logprob": -5.6210938, + "text": "'s" + }, + { + "id": 253, + "logprob": -0.81103516, + "text": " the" + }, + { + "id": 2892, + "logprob": -6.6640625, + "text": " history" + }, + { + "id": 3212, + "logprob": -2.265625, + "text": " behind" + }, + { + "id": 436, + "logprob": -11.5078125, + "text": " this" + }, + { + "id": 3159, + "logprob": -2.1582031, + "text": " word" + }, + { + "id": 32, + "logprob": -0.008720398, + "text": "?" + }, + { + "id": 0, + "logprob": -2.4726562, + "text": "<|endoftext|>" + }, + { + "id": 50281, + "logprob": -18.265625, + "text": "<|assistant|>" + } + ], + "seed": null, + "tokens": [ + { + "id": 510, + "logprob": -0.63183594, + "special": false, + "text": "The" + }, + { + "id": 3159, + "logprob": -0.5488281, + "special": false, + "text": " word" + }, + { + "id": 346, + "logprob": -0.045684814, + "special": false, + "text": " \"" + }, + { + "id": 6441, + "logprob": -0.00207901, + "special": false, + "text": "mem" + }, + { + "id": 70, + "logprob": -1.335144e-05, + "special": false, + "text": "e" + }, + { + "id": 3, + "logprob": -0.00097227097, + "special": false, + "text": "\"" + }, + { + "id": 369, + "logprob": -0.0892334, + "special": false, + "text": " was" + }, + { + "id": 806, + "logprob": -0.12463379, + "special": false, + "text": " first" + }, + { + "id": 908, + "logprob": -0.01737976, + "special": false, + "text": " used" + }, + { + "id": 275, + "logprob": -0.50341797, + "special": false, + "text": " in" + } + ] + }, + "generated_text": "The word \"meme\" was first used in" + }, + { + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [ + { + "id": 50278, + "logprob": null, + "text": "<|prompter|>" + }, + { + "id": 1276, + "logprob": -8.03125, + "text": "What" + }, + { + "id": 310, + "logprob": -5.421875, + "text": " is" + }, + { + "id": 247, + "logprob": -2.1601562, + "text": " a" + }, + { + "id": 1167, + "logprob": -5.4609375, + "text": " mem" + }, + { + "id": 70, + "logprob": -0.005657196, + "text": "e" + }, + { + "id": 13, + "logprob": -7.28125, + "text": "," + }, + { + "id": 285, + "logprob": -0.2980957, + "text": " and" + }, + { + "id": 752, + "logprob": -2.1679688, + "text": " what" + }, + { + "id": 434, + "logprob": -5.6210938, + "text": "'s" + }, + { + "id": 253, + "logprob": -0.81103516, + "text": " the" + }, + { + "id": 2892, + "logprob": -6.6640625, + "text": " history" + }, + { + "id": 3212, + "logprob": -2.265625, + "text": " behind" + }, + { + "id": 436, + "logprob": -11.5078125, + "text": " this" + }, + { + "id": 3159, + "logprob": -2.1582031, + "text": " word" + }, + { + "id": 32, + "logprob": -0.008720398, + "text": "?" + }, + { + "id": 0, + "logprob": -2.4726562, + "text": "<|endoftext|>" + }, + { + "id": 50281, + "logprob": -18.265625, + "text": "<|assistant|>" + } + ], + "seed": null, + "tokens": [ + { + "id": 510, + "logprob": -0.63183594, + "special": false, + "text": "The" + }, + { + "id": 3159, + "logprob": -0.5488281, + "special": false, + "text": " word" + }, + { + "id": 346, + "logprob": -0.045684814, + "special": false, + "text": " \"" + }, + { + "id": 6441, + "logprob": -0.00207901, + "special": false, + "text": "mem" + }, + { + "id": 70, + "logprob": -1.335144e-05, + "special": false, + "text": "e" + }, + { + "id": 3, + "logprob": -0.00097227097, + "special": false, + "text": "\"" + }, + { + "id": 369, + "logprob": -0.0892334, + "special": false, + "text": " was" + }, + { + "id": 806, + "logprob": -0.12463379, + "special": false, + "text": " first" + }, + { + "id": 908, + "logprob": -0.01737976, + "special": false, + "text": " used" + }, + { + "id": 275, + "logprob": -0.50341797, + "special": false, + "text": " in" + } + ] + }, + "generated_text": "The word \"meme\" was first used in" + }, + { + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [ + { + "id": 50278, + "logprob": null, + "text": "<|prompter|>" + }, + { + "id": 1276, + "logprob": -8.03125, + "text": "What" + }, + { + "id": 310, + "logprob": -5.421875, + "text": " is" + }, + { + "id": 247, + "logprob": -2.1601562, + "text": " a" + }, + { + "id": 1167, + "logprob": -5.4609375, + "text": " mem" + }, + { + "id": 70, + "logprob": -0.005657196, + "text": "e" + }, + { + "id": 13, + "logprob": -7.28125, + "text": "," + }, + { + "id": 285, + "logprob": -0.2980957, + "text": " and" + }, + { + "id": 752, + "logprob": -2.1679688, + "text": " what" + }, + { + "id": 434, + "logprob": -5.6210938, + "text": "'s" + }, + { + "id": 253, + "logprob": -0.81103516, + "text": " the" + }, + { + "id": 2892, + "logprob": -6.6640625, + "text": " history" + }, + { + "id": 3212, + "logprob": -2.265625, + "text": " behind" + }, + { + "id": 436, + "logprob": -11.5078125, + "text": " this" + }, + { + "id": 3159, + "logprob": -2.1582031, + "text": " word" + }, + { + "id": 32, + "logprob": -0.008720398, + "text": "?" + }, + { + "id": 0, + "logprob": -2.4726562, + "text": "<|endoftext|>" + }, + { + "id": 50281, + "logprob": -18.265625, + "text": "<|assistant|>" + } + ], + "seed": null, + "tokens": [ + { + "id": 510, + "logprob": -0.63183594, + "special": false, + "text": "The" + }, + { + "id": 3159, + "logprob": -0.5488281, + "special": false, + "text": " word" + }, + { + "id": 346, + "logprob": -0.045684814, + "special": false, + "text": " \"" + }, + { + "id": 6441, + "logprob": -0.00207901, + "special": false, + "text": "mem" + }, + { + "id": 70, + "logprob": -1.335144e-05, + "special": false, + "text": "e" + }, + { + "id": 3, + "logprob": -0.00097227097, + "special": false, + "text": "\"" + }, + { + "id": 369, + "logprob": -0.0892334, + "special": false, + "text": " was" + }, + { + "id": 806, + "logprob": -0.12463379, + "special": false, + "text": " first" + }, + { + "id": 908, + "logprob": -0.01737976, + "special": false, + "text": " used" + }, + { + "id": 275, + "logprob": -0.50341797, + "special": false, + "text": " in" + } + ] + }, + "generated_text": "The word \"meme\" was first used in" + } +] diff --git a/integration-tests/models/test_flash_neox.py b/integration-tests/models/test_flash_neox.py index 56cbf270..ca5b33c1 100644 --- a/integration-tests/models/test_flash_neox.py +++ b/integration-tests/models/test_flash_neox.py @@ -3,7 +3,7 @@ import pytest @pytest.fixture(scope="module") def flash_neox_handle(launcher): - with launcher("OpenAssistant/oasst-sft-1-pythia-12b", num_shard=2) as handle: + with launcher("stabilityai/stablelm-tuned-alpha-3b", num_shard=1) as handle: yield handle @@ -16,7 +16,7 @@ async def flash_neox(flash_neox_handle): @pytest.mark.asyncio async def test_flash_neox(flash_neox, response_snapshot): response = await flash_neox.generate( - "<|prompter|>What is a meme, and what's the history behind this word?<|endoftext|><|assistant|>", + "<|USER|>What's your mood today?<|ASSISTANT|>", max_new_tokens=10, ) @@ -28,12 +28,14 @@ async def test_flash_neox(flash_neox, response_snapshot): async def test_flash_neox_load(flash_neox, generate_load, response_snapshot): responses = await generate_load( flash_neox, - "<|prompter|>What is a meme, and what's the history behind this word?<|endoftext|><|assistant|>", + "<|USER|>What's your mood today?<|ASSISTANT|>", max_new_tokens=10, n=4, ) - assert len(responses) == 4 - assert all([r.generated_text == responses[0].generated_text for r in responses]) + generated_texts = [r.generated_text for r in responses] + + assert len(generated_texts) == 4 + assert generated_texts, all([text == generated_texts[0] for text in generated_texts]) assert responses == response_snapshot diff --git a/integration-tests/models/test_flash_neox_sharded.py b/integration-tests/models/test_flash_neox_sharded.py new file mode 100644 index 00000000..513aeaaf --- /dev/null +++ b/integration-tests/models/test_flash_neox_sharded.py @@ -0,0 +1,39 @@ +import pytest + + +@pytest.fixture(scope="module") +def flash_neox_sharded_handle(launcher): + with launcher("OpenAssistant/oasst-sft-1-pythia-12b", num_shard=2) as handle: + yield handle + + +@pytest.fixture(scope="module") +async def flash_neox_sharded(flash_neox_sharded_handle): + await flash_neox_sharded_handle.health(240) + return flash_neox_sharded_handle.client + + +@pytest.mark.asyncio +async def test_flash_neox(flash_neox_sharded, response_snapshot): + response = await flash_neox_sharded.generate( + "<|prompter|>What is a meme, and what's the history behind this word?<|endoftext|><|assistant|>", + max_new_tokens=10, + ) + + assert response.details.generated_tokens == 10 + assert response == response_snapshot + + +@pytest.mark.asyncio +async def test_flash_neox_load(flash_neox_sharded, generate_load, response_snapshot): + responses = await generate_load( + flash_neox_sharded, + "<|prompter|>What is a meme, and what's the history behind this word?<|endoftext|><|assistant|>", + max_new_tokens=10, + n=4, + ) + + assert len(responses) == 4 + assert all([r.generated_text == responses[0].generated_text for r in responses]) + + assert responses == response_snapshot diff --git a/integration-tests/pytest.ini b/integration-tests/pytest.ini new file mode 100644 index 00000000..485e6017 --- /dev/null +++ b/integration-tests/pytest.ini @@ -0,0 +1,4 @@ +[pytest] +asyncio_mode = auto +markers = + private: marks tests as requiring an admin hf token (deselect with '-m "not private"') \ No newline at end of file diff --git a/server/pyproject.toml b/server/pyproject.toml index 22aa41db..b8e15230 100644 --- a/server/pyproject.toml +++ b/server/pyproject.toml @@ -35,6 +35,9 @@ bnb = ["bitsandbytes"] grpcio-tools = "^1.51.1" pytest = "^7.3.0" +[tool.pytest.ini_options] +markers = ["private: marks tests as requiring an admin hf token (deselect with '-m \"not private\"')"] + [build-system] requires = ["poetry-core>=1.0.0"] build-backend = "poetry.core.masonry.api" diff --git a/server/text_generation_server/models/custom_modeling/flash_neox_modeling.py b/server/text_generation_server/models/custom_modeling/flash_neox_modeling.py index 2c6b8da6..b7834157 100644 --- a/server/text_generation_server/models/custom_modeling/flash_neox_modeling.py +++ b/server/text_generation_server/models/custom_modeling/flash_neox_modeling.py @@ -362,7 +362,7 @@ class FlashGPTNeoXModel(FlashGPTNeoXPreTrainedModel): pretrained_model_name_or_path, load_in_8bit=False, *model_args, **kwargs ) - model.post_load_weights(load_in_8bit) + model.post_load_weights("bitsandbytes" if load_in_8bit else None) return model def forward( @@ -466,7 +466,7 @@ class FlashGPTNeoXForCausalLM(FlashGPTNeoXPreTrainedModel): model = super(FlashGPTNeoXForCausalLM, cls).from_pretrained( pretrained_model_name_or_path, load_in_8bit=False, *model_args, **kwargs ) - model.post_load_weights(load_in_8bit) + model.post_load_weights("bitsandbytes" if load_in_8bit else None) return model def forward( diff --git a/server/text_generation_server/models/flash_neox.py b/server/text_generation_server/models/flash_neox.py index cac40bab..8c1c1a00 100644 --- a/server/text_generation_server/models/flash_neox.py +++ b/server/text_generation_server/models/flash_neox.py @@ -88,7 +88,7 @@ class FlashNeoXSharded(FlashNeoX): def load_weights( model, filenames: List[str], - quantize: bool, + quantize: Optional[str], device: torch.device, dtype: torch.dtype, rank: int, diff --git a/server/text_generation_server/models/flash_santacoder.py b/server/text_generation_server/models/flash_santacoder.py index 5dc31309..1fbaf252 100644 --- a/server/text_generation_server/models/flash_santacoder.py +++ b/server/text_generation_server/models/flash_santacoder.py @@ -80,7 +80,7 @@ class FlashSantacoder(FlashCausalLM): def load_weights( model: FlashSantacoderForCausalLM, filenames: List[Path], - quantize: bool, + quantize: Optional[str], device: torch.device, dtype: torch.dtype, transpose: bool, diff --git a/server/text_generation_server/models/opt.py b/server/text_generation_server/models/opt.py index fdae795b..8c676a51 100644 --- a/server/text_generation_server/models/opt.py +++ b/server/text_generation_server/models/opt.py @@ -101,7 +101,7 @@ class OPTSharded(OPT): def load_weights( model, filenames: List[str], - quantize: bool, + quantize: Optional[str], device: torch.device, dtype: torch.dtype, rank: int,