diff --git a/integration-tests/models/__snapshots__/test_flash_starcoder_gptq/test_flash_starcoder_gptq.json b/integration-tests/models/__snapshots__/test_flash_starcoder_gptq/test_flash_starcoder_gptq.json index 135c9deb..53055e42 100644 --- a/integration-tests/models/__snapshots__/test_flash_starcoder_gptq/test_flash_starcoder_gptq.json +++ b/integration-tests/models/__snapshots__/test_flash_starcoder_gptq/test_flash_starcoder_gptq.json @@ -1,6 +1,7 @@ { "generated_text": "\n return sum(L) / len(L)\n\n\ndef geometric_mean(L", "details": { + "best_of_sequences": null, "finish_reason": "length", "generated_tokens": 20, "seed": null, @@ -18,173 +19,173 @@ { "id": 21017, "text": "ometric", - "logprob": -9.09375 + "logprob": -9.0859375 }, { "id": 81, "text": "_", - "logprob": -0.25610352 + "logprob": -0.25878906 }, { "id": 6009, "text": "mean", - "logprob": -2.1835938 + "logprob": -2.2109375 }, { "id": 26, "text": "(", - "logprob": -0.29907227 + "logprob": -0.30371094 }, { "id": 62, "text": "L", - "logprob": -5.6015625 + "logprob": -5.6054688 }, { "id": 44, "text": ":", - "logprob": -3.0898438 + "logprob": -3.0722656 }, { "id": 1682, "text": " List", - "logprob": -0.68359375 + "logprob": -0.6879883 }, { "id": 77, "text": "[", - "logprob": -0.3869629 + "logprob": -0.38500977 }, { "id": 1808, "text": "float", - "logprob": -0.95751953 + "logprob": -0.984375 }, { "id": 10794, "text": "]):", - "logprob": -2.5507812 + "logprob": -2.5351562 } ], "tokens": [ { "id": 284, "text": "\n ", - "logprob": -1.171875, + "logprob": -1.1738281, "special": false }, { "id": 442, "text": " return", - "logprob": -0.9453125, + "logprob": -0.95947266, "special": false }, { "id": 3632, "text": " sum", - "logprob": -1.4013672, + "logprob": -1.4199219, "special": false }, { "id": 26, "text": "(", - "logprob": -0.083618164, + "logprob": -0.085876465, "special": false }, { "id": 62, "text": "L", - "logprob": -0.098083496, + "logprob": -0.09875488, "special": false }, { "id": 27, "text": ")", - "logprob": -0.30493164, + "logprob": -0.30517578, "special": false }, { "id": 517, "text": " /", - "logprob": -0.4074707, + "logprob": -0.42089844, "special": false }, { "id": 2069, "text": " len", - "logprob": -0.041015625, + "logprob": -0.042053223, "special": false }, { "id": 26, "text": "(", - "logprob": -0.0011863708, + "logprob": -0.0011806488, "special": false }, { "id": 62, "text": "L", - "logprob": -0.0005221367, + "logprob": -0.0005259514, "special": false }, { "id": 27, "text": ")", - "logprob": -0.0017499924, + "logprob": -0.0017633438, "special": false }, { "id": 478, "text": "\n\n", - "logprob": -0.69873047, + "logprob": -0.69189453, "special": false }, { "id": 203, "text": "\n", - "logprob": -0.041229248, + "logprob": -0.041870117, "special": false }, { "id": 589, "text": "def", - "logprob": -0.27929688, + "logprob": -0.27856445, "special": false }, { "id": 3226, "text": " ge", - "logprob": -1.7089844, + "logprob": -1.7255859, "special": false }, { "id": 21017, "text": "ometric", - "logprob": -0.010757446, + "logprob": -0.011291504, "special": false }, { "id": 81, "text": "_", - "logprob": -0.0090408325, + "logprob": -0.008430481, "special": false }, { "id": 6009, "text": "mean", - "logprob": -0.024932861, + "logprob": -0.025787354, "special": false }, { "id": 26, "text": "(", - "logprob": -0.06451416, + "logprob": -0.073913574, "special": false }, { "id": 62, "text": "L", - "logprob": -0.09832764, + "logprob": -0.09967041, "special": false } ] diff --git a/integration-tests/models/__snapshots__/test_flash_starcoder_gptq/test_flash_starcoder_gptq_default_params.json b/integration-tests/models/__snapshots__/test_flash_starcoder_gptq/test_flash_starcoder_gptq_default_params.json index 911847c4..6f52706d 100644 --- a/integration-tests/models/__snapshots__/test_flash_starcoder_gptq/test_flash_starcoder_gptq_default_params.json +++ b/integration-tests/models/__snapshots__/test_flash_starcoder_gptq/test_flash_starcoder_gptq_default_params.json @@ -1,6 +1,7 @@ { "generated_text": "\n return reduce(lambda x, y: x * y, L)\n\ndef geometric", "details": { + "best_of_sequences": null, "finish_reason": "length", "generated_tokens": 20, "seed": 0, diff --git a/integration-tests/models/__snapshots__/test_flash_starcoder_gptq/test_flash_starcoder_gptq_load.json b/integration-tests/models/__snapshots__/test_flash_starcoder_gptq/test_flash_starcoder_gptq_load.json index 09ee6f6e..5381ce5a 100644 --- a/integration-tests/models/__snapshots__/test_flash_starcoder_gptq/test_flash_starcoder_gptq_load.json +++ b/integration-tests/models/__snapshots__/test_flash_starcoder_gptq/test_flash_starcoder_gptq_load.json @@ -1,770 +1,534 @@ [ { - "generated_text": "\n return sum(L) / len(L)\n\n\ndef geometric_mean(L", "details": { + "best_of_sequences": null, "finish_reason": "length", - "generated_tokens": 20, - "seed": null, + "generated_tokens": 10, "prefill": [ { "id": 589, - "text": "def", - "logprob": null + "logprob": null, + "text": "def" }, { "id": 3226, - "text": " ge", - "logprob": -9.03125 + "logprob": -9.0234375, + "text": " ge" }, { "id": 21017, - "text": "ometric", - "logprob": -9.0859375 + "logprob": -9.0859375, + "text": "ometric" }, { "id": 81, - "text": "_", - "logprob": -0.26123047 + "logprob": -0.25927734, + "text": "_" }, { "id": 6009, - "text": "mean", - "logprob": -2.2304688 + "logprob": -2.25, + "text": "mean" }, { "id": 26, - "text": "(", - "logprob": -0.29833984 + "logprob": -0.30126953, + "text": "(" }, { "id": 62, - "text": "L", - "logprob": -5.7382812 + "logprob": -5.7539062, + "text": "L" }, { "id": 44, - "text": ":", - "logprob": -3.0800781 + "logprob": -3.0878906, + "text": ":" }, { "id": 1682, - "text": " List", - "logprob": -0.6791992 + "logprob": -0.6845703, + "text": " List" }, { "id": 77, - "text": "[", - "logprob": -0.390625 + "logprob": -0.3918457, + "text": "[" }, { "id": 1808, - "text": "float", - "logprob": -0.89501953 + "logprob": -0.8798828, + "text": "float" }, { "id": 10794, - "text": "]):", - "logprob": -2.5234375 + "logprob": -2.4980469, + "text": "]):" } ], - "tokens": [ - { - "id": 284, - "text": "\n ", - "logprob": -1.1699219, - "special": false - }, - { - "id": 442, - "text": " return", - "logprob": -0.9116211, - "special": false - }, - { - "id": 3632, - "text": " sum", - "logprob": -1.3330078, - "special": false - }, - { - "id": 26, - "text": "(", - "logprob": -0.080566406, - "special": false - }, - { - "id": 62, - "text": "L", - "logprob": -0.09790039, - "special": false - }, - { - "id": 27, - "text": ")", - "logprob": -0.29101562, - "special": false - }, - { - "id": 517, - "text": " /", - "logprob": -0.33569336, - "special": false - }, - { - "id": 2069, - "text": " len", - "logprob": -0.0385437, - "special": false - }, - { - "id": 26, - "text": "(", - "logprob": -0.0011758804, - "special": false - }, - { - "id": 62, - "text": "L", - "logprob": -0.00049829483, - "special": false - }, - { - "id": 27, - "text": ")", - "logprob": -0.0016813278, - "special": false - }, - { - "id": 478, - "text": "\n\n", - "logprob": -0.6923828, - "special": false - }, - { - "id": 203, - "text": "\n", - "logprob": -0.040618896, - "special": false - }, - { - "id": 589, - "text": "def", - "logprob": -0.27856445, - "special": false - }, - { - "id": 3226, - "text": " ge", - "logprob": -1.5859375, - "special": false - }, - { - "id": 21017, - "text": "ometric", - "logprob": -0.009353638, - "special": false - }, - { - "id": 81, - "text": "_", - "logprob": -0.009216309, - "special": false - }, - { - "id": 6009, - "text": "mean", - "logprob": -0.023910522, - "special": false - }, - { - "id": 26, - "text": "(", - "logprob": -0.053619385, - "special": false - }, - { - "id": 62, - "text": "L", - "logprob": -0.09790039, - "special": false - } - ] - } - }, - { - "generated_text": "\n return sum(L) / len(L)\n\n\ndef geometric_mean(L", - "details": { - "finish_reason": "length", - "generated_tokens": 20, "seed": null, - "prefill": [ - { - "id": 589, - "text": "def", - "logprob": null - }, - { - "id": 3226, - "text": " ge", - "logprob": -9.03125 - }, - { - "id": 21017, - "text": "ometric", - "logprob": -9.0859375 - }, - { - "id": 81, - "text": "_", - "logprob": -0.26123047 - }, - { - "id": 6009, - "text": "mean", - "logprob": -2.2304688 - }, - { - "id": 26, - "text": "(", - "logprob": -0.29833984 - }, - { - "id": 62, - "text": "L", - "logprob": -5.7382812 - }, - { - "id": 44, - "text": ":", - "logprob": -3.0800781 - }, - { - "id": 1682, - "text": " List", - "logprob": -0.6791992 - }, - { - "id": 77, - "text": "[", - "logprob": -0.390625 - }, - { - "id": 1808, - "text": "float", - "logprob": -0.89501953 - }, - { - "id": 10794, - "text": "]):", - "logprob": -2.5234375 - } - ], "tokens": [ { "id": 284, - "text": "\n ", - "logprob": -1.1699219, - "special": false + "logprob": -1.1533203, + "special": false, + "text": "\n " }, { "id": 442, - "text": " return", - "logprob": -0.9116211, - "special": false + "logprob": -0.91796875, + "special": false, + "text": " return" }, { "id": 3632, - "text": " sum", - "logprob": -1.3310547, - "special": false + "logprob": -1.3291016, + "special": false, + "text": " sum" }, { "id": 26, - "text": "(", - "logprob": -0.080444336, - "special": false - }, - { - "id": 62, - "text": "L", - "logprob": -0.09838867, - "special": false - }, - { - "id": 27, - "text": ")", - "logprob": -0.2919922, - "special": false - }, - { - "id": 517, - "text": " /", - "logprob": -0.33569336, - "special": false - }, - { - "id": 2069, - "text": " len", - "logprob": -0.038024902, - "special": false - }, - { - "id": 26, - "text": "(", - "logprob": -0.0011835098, - "special": false - }, - { - "id": 62, - "text": "L", - "logprob": -0.0004940033, - "special": false - }, - { - "id": 27, - "text": ")", - "logprob": -0.0016956329, - "special": false - }, - { - "id": 478, - "text": "\n\n", - "logprob": -0.6972656, - "special": false - }, - { - "id": 203, - "text": "\n", - "logprob": -0.040618896, - "special": false - }, - { - "id": 589, - "text": "def", - "logprob": -0.27807617, - "special": false - }, - { - "id": 3226, - "text": " ge", - "logprob": -1.5898438, - "special": false - }, - { - "id": 21017, - "text": "ometric", - "logprob": -0.009170532, - "special": false - }, - { - "id": 81, - "text": "_", - "logprob": -0.00932312, - "special": false - }, - { - "id": 6009, - "text": "mean", - "logprob": -0.023208618, - "special": false - }, - { - "id": 26, - "text": "(", - "logprob": -0.05041504, - "special": false - }, - { - "id": 62, - "text": "L", - "logprob": -0.09637451, - "special": false - } - ] - } - }, - { - "generated_text": "\n return sum(L) / len(L)\n\n\ndef geometric_mean(L", - "details": { - "finish_reason": "length", - "generated_tokens": 20, - "seed": null, - "prefill": [ - { - "id": 589, - "text": "def", - "logprob": null - }, - { - "id": 3226, - "text": " ge", - "logprob": -9.03125 - }, - { - "id": 21017, - "text": "ometric", - "logprob": -9.0859375 - }, - { - "id": 81, - "text": "_", - "logprob": -0.26123047 - }, - { - "id": 6009, - "text": "mean", - "logprob": -2.2304688 - }, - { - "id": 26, - "text": "(", - "logprob": -0.29833984 - }, - { - "id": 62, - "text": "L", - "logprob": -5.7382812 - }, - { - "id": 44, - "text": ":", - "logprob": -3.0800781 - }, - { - "id": 1682, - "text": " List", - "logprob": -0.6791992 - }, - { - "id": 77, - "text": "[", - "logprob": -0.390625 - }, - { - "id": 1808, - "text": "float", - "logprob": -0.89501953 - }, - { - "id": 10794, - "text": "]):", - "logprob": -2.5234375 - } - ], - "tokens": [ - { - "id": 284, - "text": "\n ", - "logprob": -1.1699219, - "special": false - }, - { - "id": 442, - "text": " return", - "logprob": -0.9169922, - "special": false - }, - { - "id": 3632, - "text": " sum", - "logprob": -1.34375, - "special": false - }, - { - "id": 26, - "text": "(", - "logprob": -0.080566406, - "special": false - }, - { - "id": 62, - "text": "L", - "logprob": -0.097839355, - "special": false - }, - { - "id": 27, - "text": ")", - "logprob": -0.29052734, - "special": false - }, - { - "id": 517, - "text": " /", - "logprob": -0.33984375, - "special": false - }, - { - "id": 2069, - "text": " len", - "logprob": -0.038330078, - "special": false - }, - { - "id": 26, - "text": "(", - "logprob": -0.0011768341, - "special": false - }, - { - "id": 62, - "text": "L", - "logprob": -0.00049591064, - "special": false - }, - { - "id": 27, - "text": ")", - "logprob": -0.0016946793, - "special": false - }, - { - "id": 478, - "text": "\n\n", - "logprob": -0.6982422, - "special": false - }, - { - "id": 203, - "text": "\n", - "logprob": -0.040649414, - "special": false - }, - { - "id": 589, - "text": "def", - "logprob": -0.27905273, - "special": false - }, - { - "id": 3226, - "text": " ge", - "logprob": -1.5888672, - "special": false - }, - { - "id": 21017, - "text": "ometric", - "logprob": -0.009300232, - "special": false - }, - { - "id": 81, - "text": "_", - "logprob": -0.009094238, - "special": false - }, - { - "id": 6009, - "text": "mean", - "logprob": -0.023910522, - "special": false - }, - { - "id": 26, - "text": "(", - "logprob": -0.05493164, - "special": false - }, - { - "id": 62, - "text": "L", - "logprob": -0.097229004, - "special": false - } - ] - } - }, - { - "generated_text": "\n return sum(L) / len(L)\n\n\ndef geometric_mean(L", - "details": { - "finish_reason": "length", - "generated_tokens": 20, - "seed": null, - "prefill": [ - { - "id": 589, - "text": "def", - "logprob": null - }, - { - "id": 3226, - "text": " ge", - "logprob": -9.03125 - }, - { - "id": 21017, - "text": "ometric", - "logprob": -9.0859375 - }, - { - "id": 81, - "text": "_", - "logprob": -0.26123047 - }, - { - "id": 6009, - "text": "mean", - "logprob": -2.2304688 - }, - { - "id": 26, - "text": "(", - "logprob": -0.29833984 - }, - { - "id": 62, - "text": "L", - "logprob": -5.7382812 - }, - { - "id": 44, - "text": ":", - "logprob": -3.0800781 - }, - { - "id": 1682, - "text": " List", - "logprob": -0.6791992 - }, - { - "id": 77, - "text": "[", - "logprob": -0.390625 - }, - { - "id": 1808, - "text": "float", - "logprob": -0.89501953 - }, - { - "id": 10794, - "text": "]):", - "logprob": -2.5234375 - } - ], - "tokens": [ - { - "id": 284, - "text": "\n ", - "logprob": -1.1699219, - "special": false - }, - { - "id": 442, - "text": " return", - "logprob": -0.9116211, - "special": false - }, - { - "id": 3632, - "text": " sum", - "logprob": -1.3349609, - "special": false - }, - { - "id": 26, - "text": "(", "logprob": -0.08062744, - "special": false + "special": false, + "text": "(" }, { "id": 62, - "text": "L", - "logprob": -0.0993042, - "special": false + "logprob": -0.097717285, + "special": false, + "text": "L" }, { "id": 27, - "text": ")", - "logprob": -0.2890625, - "special": false + "logprob": -0.29003906, + "special": false, + "text": ")" }, { "id": 517, - "text": " /", - "logprob": -0.33520508, - "special": false + "logprob": -0.34958984, + "special": false, + "text": " /" }, { "id": 2069, - "text": " len", - "logprob": -0.038757324, - "special": false + "logprob": -0.03829956, + "special": false, + "text": " len" }, { "id": 26, - "text": "(", - "logprob": -0.0011634827, - "special": false + "logprob": -0.0011987686, + "special": false, + "text": "(" }, { "id": 62, - "text": "L", - "logprob": -0.0004963875, - "special": false - }, - { - "id": 27, - "text": ")", - "logprob": -0.0016756058, - "special": false - }, - { - "id": 478, - "text": "\n\n", - "logprob": -0.69140625, - "special": false - }, - { - "id": 203, - "text": "\n", - "logprob": -0.040618896, - "special": false - }, + "logprob": -0.00050878525, + "special": false, + "text": "L" + } + ] + }, + "generated_text": "\n return sum(L) / len(L" + }, + { + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [ { "id": 589, - "text": "def", - "logprob": -0.27929688, - "special": false + "logprob": null, + "text": "def" }, { "id": 3226, - "text": " ge", - "logprob": -1.5898438, - "special": false + "logprob": -9.0234375, + "text": " ge" }, { "id": 21017, - "text": "ometric", - "logprob": -0.00907135, - "special": false + "logprob": -9.0859375, + "text": "ometric" }, { "id": 81, - "text": "_", - "logprob": -0.008728027, - "special": false + "logprob": -0.25878906, + "text": "_" }, { "id": 6009, - "text": "mean", - "logprob": -0.024429321, - "special": false + "logprob": -2.2109375, + "text": "mean" }, { "id": 26, - "text": "(", - "logprob": -0.059814453, - "special": false + "logprob": -0.30371094, + "text": "(" }, { "id": 62, - "text": "L", - "logprob": -0.09899902, - "special": false + "logprob": -5.6054688, + "text": "L" + }, + { + "id": 44, + "logprob": -3.0722656, + "text": ":" + }, + { + "id": 1682, + "logprob": -0.6879883, + "text": " List" + }, + { + "id": 77, + "logprob": -0.38500977, + "text": "[" + }, + { + "id": 1808, + "logprob": -0.984375, + "text": "float" + }, + { + "id": 10794, + "logprob": -2.5351562, + "text": "]):" + } + ], + "seed": null, + "tokens": [ + { + "id": 284, + "logprob": -1.1738281, + "special": false, + "text": "\n " + }, + { + "id": 442, + "logprob": -0.9584961, + "special": false, + "text": " return" + }, + { + "id": 3632, + "logprob": -1.4169922, + "special": false, + "text": " sum" + }, + { + "id": 26, + "logprob": -0.085876465, + "special": false, + "text": "(" + }, + { + "id": 62, + "logprob": -0.0982666, + "special": false, + "text": "L" + }, + { + "id": 27, + "logprob": -0.3022461, + "special": false, + "text": ")" + }, + { + "id": 517, + "logprob": -0.40504883, + "special": false, + "text": " /" + }, + { + "id": 2069, + "logprob": -0.041656494, + "special": false, + "text": " len" + }, + { + "id": 26, + "logprob": -0.0011844635, + "special": false, + "text": "(" + }, + { + "id": 62, + "logprob": -0.0005264282, + "special": false, + "text": "L" } ] - } + }, + "generated_text": "\n return sum(L) / len(L" + }, + { + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [ + { + "id": 589, + "logprob": null, + "text": "def" + }, + { + "id": 3226, + "logprob": -9.0234375, + "text": " ge" + }, + { + "id": 21017, + "logprob": -9.0859375, + "text": "ometric" + }, + { + "id": 81, + "logprob": -0.25927734, + "text": "_" + }, + { + "id": 6009, + "logprob": -2.25, + "text": "mean" + }, + { + "id": 26, + "logprob": -0.30126953, + "text": "(" + }, + { + "id": 62, + "logprob": -5.7539062, + "text": "L" + }, + { + "id": 44, + "logprob": -3.0878906, + "text": ":" + }, + { + "id": 1682, + "logprob": -0.6845703, + "text": " List" + }, + { + "id": 77, + "logprob": -0.3918457, + "text": "[" + }, + { + "id": 1808, + "logprob": -0.8798828, + "text": "float" + }, + { + "id": 10794, + "logprob": -2.4980469, + "text": "]):" + } + ], + "seed": null, + "tokens": [ + { + "id": 284, + "logprob": -1.1533203, + "special": false, + "text": "\n " + }, + { + "id": 442, + "logprob": -0.9165039, + "special": false, + "text": " return" + }, + { + "id": 3632, + "logprob": -1.328125, + "special": false, + "text": " sum" + }, + { + "id": 26, + "logprob": -0.07946777, + "special": false, + "text": "(" + }, + { + "id": 62, + "logprob": -0.09820557, + "special": false, + "text": "L" + }, + { + "id": 27, + "logprob": -0.28930664, + "special": false, + "text": ")" + }, + { + "id": 517, + "logprob": -0.34592773, + "special": false, + "text": " /" + }, + { + "id": 2069, + "logprob": -0.038330078, + "special": false, + "text": " len" + }, + { + "id": 26, + "logprob": -0.0011940002, + "special": false, + "text": "(" + }, + { + "id": 62, + "logprob": -0.00050878525, + "special": false, + "text": "L" + } + ] + }, + "generated_text": "\n return sum(L) / len(L" + }, + { + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [ + { + "id": 589, + "logprob": null, + "text": "def" + }, + { + "id": 3226, + "logprob": -9.0234375, + "text": " ge" + }, + { + "id": 21017, + "logprob": -9.0859375, + "text": "ometric" + }, + { + "id": 81, + "logprob": -0.25927734, + "text": "_" + }, + { + "id": 6009, + "logprob": -2.25, + "text": "mean" + }, + { + "id": 26, + "logprob": -0.30126953, + "text": "(" + }, + { + "id": 62, + "logprob": -5.7539062, + "text": "L" + }, + { + "id": 44, + "logprob": -3.0878906, + "text": ":" + }, + { + "id": 1682, + "logprob": -0.6845703, + "text": " List" + }, + { + "id": 77, + "logprob": -0.3918457, + "text": "[" + }, + { + "id": 1808, + "logprob": -0.8798828, + "text": "float" + }, + { + "id": 10794, + "logprob": -2.4980469, + "text": "]):" + } + ], + "seed": null, + "tokens": [ + { + "id": 284, + "logprob": -1.1533203, + "special": false, + "text": "\n " + }, + { + "id": 442, + "logprob": -0.91259766, + "special": false, + "text": " return" + }, + { + "id": 3632, + "logprob": -1.3251953, + "special": false, + "text": " sum" + }, + { + "id": 26, + "logprob": -0.08062744, + "special": false, + "text": "(" + }, + { + "id": 62, + "logprob": -0.09906006, + "special": false, + "text": "L" + }, + { + "id": 27, + "logprob": -0.28979492, + "special": false, + "text": ")" + }, + { + "id": 517, + "logprob": -0.35958984, + "special": false, + "text": " /" + }, + { + "id": 2069, + "logprob": -0.038604736, + "special": false, + "text": " len" + }, + { + "id": 26, + "logprob": -0.0011901855, + "special": false, + "text": "(" + }, + { + "id": 62, + "logprob": -0.0005078316, + "special": false, + "text": "L" + } + ] + }, + "generated_text": "\n return sum(L) / len(L" } ] diff --git a/integration-tests/models/test_flash_starcoder_gptq.py b/integration-tests/models/test_flash_starcoder_gptq.py index 2295efbe..b6bed6a6 100644 --- a/integration-tests/models/test_flash_starcoder_gptq.py +++ b/integration-tests/models/test_flash_starcoder_gptq.py @@ -17,9 +17,8 @@ async def flash_starcoder_gptq(flash_starcoder_gptq_handle): @pytest.mark.private async def test_flash_starcoder_gptq(flash_starcoder_gptq, response_snapshot): response = await flash_starcoder_gptq.generate( - "def geometric_mean(L: List[float]):", max_new_tokens=20, decoder_input_details=True + "def geometric_mean(L: List[float]):", max_new_tokens=20, decoder_input_details=True, ) - assert response.details.generated_tokens == 20 assert response == response_snapshot @@ -35,7 +34,6 @@ async def test_flash_starcoder_gptq_default_params(flash_starcoder_gptq, respons decoder_input_details=True, seed=0, ) - assert response.details.generated_tokens == 20 assert response == response_snapshot @@ -43,7 +41,7 @@ async def test_flash_starcoder_gptq_default_params(flash_starcoder_gptq, respons @pytest.mark.asyncio @pytest.mark.private async def test_flash_starcoder_gptq_load(flash_starcoder_gptq, generate_load, response_snapshot): - responses = await generate_load(flash_starcoder_gptq, "def geometric_mean(L: List[float]):", max_new_tokens=20, n=4) + responses = await generate_load(flash_starcoder_gptq, "def geometric_mean(L: List[float]):", max_new_tokens=10, n=4) assert len(responses) == 4 assert all([r.generated_text == responses[0].generated_text for r in responses]) diff --git a/server/text_generation_server/models/custom_modeling/flash_santacoder_modeling.py b/server/text_generation_server/models/custom_modeling/flash_santacoder_modeling.py index 28a25fd5..a0da1e20 100644 --- a/server/text_generation_server/models/custom_modeling/flash_santacoder_modeling.py +++ b/server/text_generation_server/models/custom_modeling/flash_santacoder_modeling.py @@ -378,7 +378,7 @@ class Block(nn.Module): max_s, ): hidden_states, residual = self.ln_1(hidden_states, residual) - + hidden_states = self.attn( hidden_states, cu_seqlen_prefill,