mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-07-30 11:50:19 +00:00
feat: refactor position ids in warmup and bump tests
This commit is contained in:
parent
cf5c66043e
commit
7ab99bc6b3
@ -1,73 +1,469 @@
|
||||
{
|
||||
"details": {
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"finish_reason": "eos_token",
|
||||
"generated_tokens": 76,
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
"id": 18183,
|
||||
"logprob": -1.6669922,
|
||||
"logprob": -1.5195312,
|
||||
"special": false,
|
||||
"text": " Deep"
|
||||
},
|
||||
{
|
||||
"id": 6832,
|
||||
"logprob": -0.08959961,
|
||||
"logprob": -0.06817627,
|
||||
"special": false,
|
||||
"text": " learning"
|
||||
},
|
||||
{
|
||||
"id": 374,
|
||||
"logprob": -0.14685059,
|
||||
"logprob": -0.13122559,
|
||||
"special": false,
|
||||
"text": " is"
|
||||
},
|
||||
{
|
||||
"id": 264,
|
||||
"logprob": -0.125,
|
||||
"logprob": -0.13415527,
|
||||
"special": false,
|
||||
"text": " a"
|
||||
},
|
||||
{
|
||||
"id": 25993,
|
||||
"logprob": -0.81640625,
|
||||
"logprob": -0.8769531,
|
||||
"special": false,
|
||||
"text": " subset"
|
||||
},
|
||||
{
|
||||
"id": 315,
|
||||
"logprob": -0.0013418198,
|
||||
"logprob": -0.0011396408,
|
||||
"special": false,
|
||||
"text": " of"
|
||||
},
|
||||
{
|
||||
"id": 5662,
|
||||
"logprob": -0.16027832,
|
||||
"logprob": -0.16442871,
|
||||
"special": false,
|
||||
"text": " machine"
|
||||
},
|
||||
{
|
||||
"id": 6832,
|
||||
"logprob": -0.0016393661,
|
||||
"logprob": -0.0026416779,
|
||||
"special": false,
|
||||
"text": " learning"
|
||||
},
|
||||
{
|
||||
"id": 429,
|
||||
"logprob": -0.4477539,
|
||||
"logprob": -0.48754883,
|
||||
"special": false,
|
||||
"text": " that"
|
||||
},
|
||||
{
|
||||
"id": 5711,
|
||||
"logprob": -1.2802734,
|
||||
"logprob": -1.2294922,
|
||||
"special": false,
|
||||
"text": " uses"
|
||||
},
|
||||
{
|
||||
"id": 29728,
|
||||
"logprob": -0.66503906,
|
||||
"special": false,
|
||||
"text": " neural"
|
||||
},
|
||||
{
|
||||
"id": 14155,
|
||||
"logprob": -0.02960205,
|
||||
"special": false,
|
||||
"text": " networks"
|
||||
},
|
||||
{
|
||||
"id": 311,
|
||||
"logprob": -0.7236328,
|
||||
"special": false,
|
||||
"text": " to"
|
||||
},
|
||||
{
|
||||
"id": 3960,
|
||||
"logprob": -1.1914062,
|
||||
"special": false,
|
||||
"text": " learn"
|
||||
},
|
||||
{
|
||||
"id": 504,
|
||||
"logprob": -0.7089844,
|
||||
"special": false,
|
||||
"text": " from"
|
||||
},
|
||||
{
|
||||
"id": 821,
|
||||
"logprob": -0.7729492,
|
||||
"special": false,
|
||||
"text": " data"
|
||||
},
|
||||
{
|
||||
"id": 13,
|
||||
"logprob": -0.7836914,
|
||||
"special": false,
|
||||
"text": "."
|
||||
},
|
||||
{
|
||||
"id": 1084,
|
||||
"logprob": -0.9941406,
|
||||
"special": false,
|
||||
"text": " It"
|
||||
},
|
||||
{
|
||||
"id": 374,
|
||||
"logprob": -0.52441406,
|
||||
"special": false,
|
||||
"text": " is"
|
||||
},
|
||||
{
|
||||
"id": 264,
|
||||
"logprob": -0.9511719,
|
||||
"special": false,
|
||||
"text": " a"
|
||||
},
|
||||
{
|
||||
"id": 943,
|
||||
"logprob": -0.8642578,
|
||||
"special": false,
|
||||
"text": " type"
|
||||
},
|
||||
{
|
||||
"id": 315,
|
||||
"logprob": -0.00030231476,
|
||||
"special": false,
|
||||
"text": " of"
|
||||
},
|
||||
{
|
||||
"id": 20443,
|
||||
"logprob": -0.14416504,
|
||||
"special": false,
|
||||
"text": " artificial"
|
||||
},
|
||||
{
|
||||
"id": 11229,
|
||||
"logprob": -0.013824463,
|
||||
"special": false,
|
||||
"text": " intelligence"
|
||||
},
|
||||
{
|
||||
"id": 429,
|
||||
"logprob": -0.18762207,
|
||||
"special": false,
|
||||
"text": " that"
|
||||
},
|
||||
{
|
||||
"id": 646,
|
||||
"logprob": -1.0087891,
|
||||
"special": false,
|
||||
"text": " can"
|
||||
},
|
||||
{
|
||||
"id": 3960,
|
||||
"logprob": -0.90234375,
|
||||
"special": false,
|
||||
"text": " learn"
|
||||
},
|
||||
{
|
||||
"id": 504,
|
||||
"logprob": -0.54345703,
|
||||
"special": false,
|
||||
"text": " from"
|
||||
},
|
||||
{
|
||||
"id": 323,
|
||||
"logprob": -1.0400391,
|
||||
"special": false,
|
||||
"text": " and"
|
||||
},
|
||||
{
|
||||
"id": 1281,
|
||||
"logprob": -0.072509766,
|
||||
"special": false,
|
||||
"text": " make"
|
||||
},
|
||||
{
|
||||
"id": 19898,
|
||||
"logprob": -0.16516113,
|
||||
"special": false,
|
||||
"text": " predictions"
|
||||
},
|
||||
{
|
||||
"id": 389,
|
||||
"logprob": -0.4416504,
|
||||
"special": false,
|
||||
"text": " on"
|
||||
},
|
||||
{
|
||||
"id": 3460,
|
||||
"logprob": -0.5385742,
|
||||
"special": false,
|
||||
"text": " large"
|
||||
},
|
||||
{
|
||||
"id": 14713,
|
||||
"logprob": -0.4387207,
|
||||
"special": false,
|
||||
"text": " amounts"
|
||||
},
|
||||
{
|
||||
"id": 315,
|
||||
"logprob": -0.00015091896,
|
||||
"special": false,
|
||||
"text": " of"
|
||||
},
|
||||
{
|
||||
"id": 821,
|
||||
"logprob": -0.061431885,
|
||||
"special": false,
|
||||
"text": " data"
|
||||
},
|
||||
{
|
||||
"id": 13,
|
||||
"logprob": -0.71875,
|
||||
"special": false,
|
||||
"text": "."
|
||||
},
|
||||
{
|
||||
"id": 18183,
|
||||
"logprob": -0.23632812,
|
||||
"special": false,
|
||||
"text": " Deep"
|
||||
},
|
||||
{
|
||||
"id": 6832,
|
||||
"logprob": -0.0017204285,
|
||||
"special": false,
|
||||
"text": " learning"
|
||||
},
|
||||
{
|
||||
"id": 374,
|
||||
"logprob": -1.1738281,
|
||||
"special": false,
|
||||
"text": " is"
|
||||
},
|
||||
{
|
||||
"id": 1483,
|
||||
"logprob": -0.61083984,
|
||||
"special": false,
|
||||
"text": " used"
|
||||
},
|
||||
{
|
||||
"id": 304,
|
||||
"logprob": -0.035003662,
|
||||
"special": false,
|
||||
"text": " in"
|
||||
},
|
||||
{
|
||||
"id": 264,
|
||||
"logprob": -0.118652344,
|
||||
"special": false,
|
||||
"text": " a"
|
||||
},
|
||||
{
|
||||
"id": 8045,
|
||||
"logprob": -0.42016602,
|
||||
"special": false,
|
||||
"text": " variety"
|
||||
},
|
||||
{
|
||||
"id": 315,
|
||||
"logprob": -1.6212463e-05,
|
||||
"special": false,
|
||||
"text": " of"
|
||||
},
|
||||
{
|
||||
"id": 8357,
|
||||
"logprob": -0.1315918,
|
||||
"special": false,
|
||||
"text": " applications"
|
||||
},
|
||||
{
|
||||
"id": 11,
|
||||
"logprob": -0.12915039,
|
||||
"special": false,
|
||||
"text": ","
|
||||
},
|
||||
{
|
||||
"id": 2670,
|
||||
"logprob": -0.12463379,
|
||||
"special": false,
|
||||
"text": " including"
|
||||
},
|
||||
{
|
||||
"id": 2168,
|
||||
"logprob": -0.37402344,
|
||||
"special": false,
|
||||
"text": " image"
|
||||
},
|
||||
{
|
||||
"id": 323,
|
||||
"logprob": -0.1451416,
|
||||
"special": false,
|
||||
"text": " and"
|
||||
},
|
||||
{
|
||||
"id": 8806,
|
||||
"logprob": -0.028869629,
|
||||
"special": false,
|
||||
"text": " speech"
|
||||
},
|
||||
{
|
||||
"id": 17843,
|
||||
"logprob": -0.00024068356,
|
||||
"special": false,
|
||||
"text": " recognition"
|
||||
},
|
||||
{
|
||||
"id": 11,
|
||||
"logprob": -0.00031018257,
|
||||
"special": false,
|
||||
"text": ","
|
||||
},
|
||||
{
|
||||
"id": 5810,
|
||||
"logprob": -0.019821167,
|
||||
"special": false,
|
||||
"text": " natural"
|
||||
},
|
||||
{
|
||||
"id": 4128,
|
||||
"logprob": -0.00012528896,
|
||||
"special": false,
|
||||
"text": " language"
|
||||
},
|
||||
{
|
||||
"id": 8692,
|
||||
"logprob": -0.00089263916,
|
||||
"special": false,
|
||||
"text": " processing"
|
||||
},
|
||||
{
|
||||
"id": 11,
|
||||
"logprob": -0.00073862076,
|
||||
"special": false,
|
||||
"text": ","
|
||||
},
|
||||
{
|
||||
"id": 323,
|
||||
"logprob": -0.040161133,
|
||||
"special": false,
|
||||
"text": " and"
|
||||
},
|
||||
{
|
||||
"id": 38193,
|
||||
"logprob": -0.4519043,
|
||||
"special": false,
|
||||
"text": " autonomous"
|
||||
},
|
||||
{
|
||||
"id": 11474,
|
||||
"logprob": -0.39941406,
|
||||
"special": false,
|
||||
"text": " vehicles"
|
||||
},
|
||||
{
|
||||
"id": 13,
|
||||
"logprob": -0.21166992,
|
||||
"special": false,
|
||||
"text": "."
|
||||
},
|
||||
{
|
||||
"id": 1084,
|
||||
"logprob": -0.9082031,
|
||||
"special": false,
|
||||
"text": " It"
|
||||
},
|
||||
{
|
||||
"id": 374,
|
||||
"logprob": -0.44213867,
|
||||
"special": false,
|
||||
"text": " is"
|
||||
},
|
||||
{
|
||||
"id": 264,
|
||||
"logprob": -1.2177734,
|
||||
"special": false,
|
||||
"text": " a"
|
||||
},
|
||||
{
|
||||
"id": 18512,
|
||||
"logprob": -0.5205078,
|
||||
"special": false,
|
||||
"text": " rapidly"
|
||||
},
|
||||
{
|
||||
"id": 7826,
|
||||
"logprob": -0.15332031,
|
||||
"special": false,
|
||||
"text": " growing"
|
||||
},
|
||||
{
|
||||
"id": 2070,
|
||||
"logprob": -0.0039978027,
|
||||
"special": false,
|
||||
"text": " field"
|
||||
},
|
||||
{
|
||||
"id": 448,
|
||||
"logprob": -0.9091797,
|
||||
"special": false,
|
||||
"text": " with"
|
||||
},
|
||||
{
|
||||
"id": 1657,
|
||||
"logprob": -0.17114258,
|
||||
"special": false,
|
||||
"text": " many"
|
||||
},
|
||||
{
|
||||
"id": 4650,
|
||||
"logprob": -0.70703125,
|
||||
"special": false,
|
||||
"text": " potential"
|
||||
},
|
||||
{
|
||||
"id": 8357,
|
||||
"logprob": -0.025131226,
|
||||
"special": false,
|
||||
"text": " applications"
|
||||
},
|
||||
{
|
||||
"id": 304,
|
||||
"logprob": -0.6699219,
|
||||
"special": false,
|
||||
"text": " in"
|
||||
},
|
||||
{
|
||||
"id": 279,
|
||||
"logprob": -0.35205078,
|
||||
"special": false,
|
||||
"text": " the"
|
||||
},
|
||||
{
|
||||
"id": 3853,
|
||||
"logprob": -0.049194336,
|
||||
"special": false,
|
||||
"text": " future"
|
||||
},
|
||||
{
|
||||
"id": 13,
|
||||
"logprob": -0.21972656,
|
||||
"special": false,
|
||||
"text": "."
|
||||
},
|
||||
{
|
||||
"id": 151643,
|
||||
"logprob": -2.0019531,
|
||||
"special": true,
|
||||
"text": "<|endoftext|>"
|
||||
}
|
||||
],
|
||||
"top_tokens": null
|
||||
},
|
||||
"generated_text": " Deep learning is a subset of machine learning that uses"
|
||||
"generated_text": " Deep learning is a subset of machine learning that uses neural networks to learn from data. It is a type of artificial intelligence that can learn from and make predictions on large amounts of data. Deep learning is used in a variety of applications, including image and speech recognition, natural language processing, and autonomous vehicles. It is a rapidly growing field with many potential applications in the future."
|
||||
}
|
||||
|
@ -7,67 +7,67 @@
|
||||
"seed": 0,
|
||||
"tokens": [
|
||||
{
|
||||
"id": 1939,
|
||||
"logprob": -2.2460938,
|
||||
"id": 5267,
|
||||
"logprob": -1.1464844,
|
||||
"special": false,
|
||||
"text": "?\n\n"
|
||||
"text": "?\n"
|
||||
},
|
||||
{
|
||||
"id": 33464,
|
||||
"logprob": 0.0,
|
||||
"logprob": -0.83203125,
|
||||
"special": false,
|
||||
"text": "Deep"
|
||||
},
|
||||
{
|
||||
"id": 20909,
|
||||
"logprob": -0.48608398,
|
||||
"logprob": -0.5625,
|
||||
"special": false,
|
||||
"text": " Learning"
|
||||
},
|
||||
{
|
||||
"id": 4102,
|
||||
"logprob": -2.265625,
|
||||
"special": false,
|
||||
"text": " "
|
||||
},
|
||||
{
|
||||
"id": 285,
|
||||
"logprob": 0.0,
|
||||
"special": false,
|
||||
"text": "is"
|
||||
},
|
||||
{
|
||||
"id": 458,
|
||||
"logprob": -0.6328125,
|
||||
"special": false,
|
||||
"text": " an"
|
||||
},
|
||||
{
|
||||
"id": 20443,
|
||||
"logprob": -0.1796875,
|
||||
"special": false,
|
||||
"text": " artificial"
|
||||
},
|
||||
{
|
||||
"id": 11229,
|
||||
"logprob": 0.0,
|
||||
"special": false,
|
||||
"text": " intelligence"
|
||||
},
|
||||
{
|
||||
"id": 320,
|
||||
"logprob": -0.37695312,
|
||||
"logprob": -2.1464844,
|
||||
"special": false,
|
||||
"text": " ("
|
||||
},
|
||||
{
|
||||
"id": 15469,
|
||||
"id": 16524,
|
||||
"logprob": 0.0,
|
||||
"special": false,
|
||||
"text": "AI"
|
||||
"text": "DL"
|
||||
},
|
||||
{
|
||||
"id": 701,
|
||||
"logprob": -2.2089844,
|
||||
"special": false,
|
||||
"text": "),"
|
||||
},
|
||||
{
|
||||
"id": 476,
|
||||
"logprob": -0.27368164,
|
||||
"special": false,
|
||||
"text": " or"
|
||||
},
|
||||
{
|
||||
"id": 20443,
|
||||
"logprob": -0.09442139,
|
||||
"special": false,
|
||||
"text": " artificial"
|
||||
},
|
||||
{
|
||||
"id": 29728,
|
||||
"logprob": 0.0,
|
||||
"special": false,
|
||||
"text": " neural"
|
||||
},
|
||||
{
|
||||
"id": 14155,
|
||||
"logprob": 0.0,
|
||||
"special": false,
|
||||
"text": " networks"
|
||||
}
|
||||
],
|
||||
"top_tokens": null
|
||||
},
|
||||
"generated_text": "What is deep learning?\n\nDeep Learning is an artificial intelligence (AI"
|
||||
"generated_text": "What is deep learning?\nDeep Learning (DL), or artificial neural networks"
|
||||
}
|
||||
|
@ -9,61 +9,61 @@
|
||||
"tokens": [
|
||||
{
|
||||
"id": 18183,
|
||||
"logprob": -1.4912109,
|
||||
"logprob": -1.5195312,
|
||||
"special": false,
|
||||
"text": " Deep"
|
||||
},
|
||||
{
|
||||
"id": 6832,
|
||||
"logprob": -0.075683594,
|
||||
"logprob": -0.06817627,
|
||||
"special": false,
|
||||
"text": " learning"
|
||||
},
|
||||
{
|
||||
"id": 374,
|
||||
"logprob": -0.12408447,
|
||||
"logprob": -0.13122559,
|
||||
"special": false,
|
||||
"text": " is"
|
||||
},
|
||||
{
|
||||
"id": 264,
|
||||
"logprob": -0.12768555,
|
||||
"logprob": -0.13415527,
|
||||
"special": false,
|
||||
"text": " a"
|
||||
},
|
||||
{
|
||||
"id": 25993,
|
||||
"logprob": -0.82128906,
|
||||
"logprob": -0.87353516,
|
||||
"special": false,
|
||||
"text": " subset"
|
||||
},
|
||||
{
|
||||
"id": 315,
|
||||
"logprob": -0.0012636185,
|
||||
"logprob": -0.0011396408,
|
||||
"special": false,
|
||||
"text": " of"
|
||||
},
|
||||
{
|
||||
"id": 5662,
|
||||
"logprob": -0.12878418,
|
||||
"logprob": -0.16442871,
|
||||
"special": false,
|
||||
"text": " machine"
|
||||
},
|
||||
{
|
||||
"id": 6832,
|
||||
"logprob": -0.0015888214,
|
||||
"logprob": -0.0026416779,
|
||||
"special": false,
|
||||
"text": " learning"
|
||||
},
|
||||
{
|
||||
"id": 429,
|
||||
"logprob": -0.49194336,
|
||||
"logprob": -0.48754883,
|
||||
"special": false,
|
||||
"text": " that"
|
||||
},
|
||||
{
|
||||
"id": 5711,
|
||||
"logprob": -1.2626953,
|
||||
"logprob": -1.2294922,
|
||||
"special": false,
|
||||
"text": " uses"
|
||||
}
|
||||
@ -82,61 +82,61 @@
|
||||
"tokens": [
|
||||
{
|
||||
"id": 18183,
|
||||
"logprob": -1.4912109,
|
||||
"logprob": -1.5195312,
|
||||
"special": false,
|
||||
"text": " Deep"
|
||||
},
|
||||
{
|
||||
"id": 6832,
|
||||
"logprob": -0.075683594,
|
||||
"logprob": -0.06817627,
|
||||
"special": false,
|
||||
"text": " learning"
|
||||
},
|
||||
{
|
||||
"id": 374,
|
||||
"logprob": -0.12408447,
|
||||
"logprob": -0.13122559,
|
||||
"special": false,
|
||||
"text": " is"
|
||||
},
|
||||
{
|
||||
"id": 264,
|
||||
"logprob": -0.12768555,
|
||||
"logprob": -0.13415527,
|
||||
"special": false,
|
||||
"text": " a"
|
||||
},
|
||||
{
|
||||
"id": 25993,
|
||||
"logprob": -0.82128906,
|
||||
"logprob": -0.87353516,
|
||||
"special": false,
|
||||
"text": " subset"
|
||||
},
|
||||
{
|
||||
"id": 315,
|
||||
"logprob": -0.0012636185,
|
||||
"logprob": -0.0011396408,
|
||||
"special": false,
|
||||
"text": " of"
|
||||
},
|
||||
{
|
||||
"id": 5662,
|
||||
"logprob": -0.12878418,
|
||||
"logprob": -0.16442871,
|
||||
"special": false,
|
||||
"text": " machine"
|
||||
},
|
||||
{
|
||||
"id": 6832,
|
||||
"logprob": -0.0015888214,
|
||||
"logprob": -0.0026416779,
|
||||
"special": false,
|
||||
"text": " learning"
|
||||
},
|
||||
{
|
||||
"id": 429,
|
||||
"logprob": -0.49194336,
|
||||
"logprob": -0.48754883,
|
||||
"special": false,
|
||||
"text": " that"
|
||||
},
|
||||
{
|
||||
"id": 5711,
|
||||
"logprob": -1.2626953,
|
||||
"logprob": -1.2294922,
|
||||
"special": false,
|
||||
"text": " uses"
|
||||
}
|
||||
@ -155,61 +155,61 @@
|
||||
"tokens": [
|
||||
{
|
||||
"id": 18183,
|
||||
"logprob": -1.4912109,
|
||||
"logprob": -1.5195312,
|
||||
"special": false,
|
||||
"text": " Deep"
|
||||
},
|
||||
{
|
||||
"id": 6832,
|
||||
"logprob": -0.075683594,
|
||||
"logprob": -0.06817627,
|
||||
"special": false,
|
||||
"text": " learning"
|
||||
},
|
||||
{
|
||||
"id": 374,
|
||||
"logprob": -0.12408447,
|
||||
"logprob": -0.13122559,
|
||||
"special": false,
|
||||
"text": " is"
|
||||
},
|
||||
{
|
||||
"id": 264,
|
||||
"logprob": -0.12768555,
|
||||
"logprob": -0.13415527,
|
||||
"special": false,
|
||||
"text": " a"
|
||||
},
|
||||
{
|
||||
"id": 25993,
|
||||
"logprob": -0.82128906,
|
||||
"logprob": -0.87353516,
|
||||
"special": false,
|
||||
"text": " subset"
|
||||
},
|
||||
{
|
||||
"id": 315,
|
||||
"logprob": -0.0012636185,
|
||||
"logprob": -0.0011396408,
|
||||
"special": false,
|
||||
"text": " of"
|
||||
},
|
||||
{
|
||||
"id": 5662,
|
||||
"logprob": -0.12878418,
|
||||
"logprob": -0.16442871,
|
||||
"special": false,
|
||||
"text": " machine"
|
||||
},
|
||||
{
|
||||
"id": 6832,
|
||||
"logprob": -0.0015888214,
|
||||
"logprob": -0.0026416779,
|
||||
"special": false,
|
||||
"text": " learning"
|
||||
},
|
||||
{
|
||||
"id": 429,
|
||||
"logprob": -0.49194336,
|
||||
"logprob": -0.48754883,
|
||||
"special": false,
|
||||
"text": " that"
|
||||
},
|
||||
{
|
||||
"id": 5711,
|
||||
"logprob": -1.2626953,
|
||||
"logprob": -1.2294922,
|
||||
"special": false,
|
||||
"text": " uses"
|
||||
}
|
||||
@ -228,61 +228,61 @@
|
||||
"tokens": [
|
||||
{
|
||||
"id": 18183,
|
||||
"logprob": -1.4912109,
|
||||
"logprob": -1.5195312,
|
||||
"special": false,
|
||||
"text": " Deep"
|
||||
},
|
||||
{
|
||||
"id": 6832,
|
||||
"logprob": -0.075683594,
|
||||
"logprob": -0.06817627,
|
||||
"special": false,
|
||||
"text": " learning"
|
||||
},
|
||||
{
|
||||
"id": 374,
|
||||
"logprob": -0.12408447,
|
||||
"logprob": -0.13122559,
|
||||
"special": false,
|
||||
"text": " is"
|
||||
},
|
||||
{
|
||||
"id": 264,
|
||||
"logprob": -0.12768555,
|
||||
"logprob": -0.13415527,
|
||||
"special": false,
|
||||
"text": " a"
|
||||
},
|
||||
{
|
||||
"id": 25993,
|
||||
"logprob": -0.82128906,
|
||||
"logprob": -0.87353516,
|
||||
"special": false,
|
||||
"text": " subset"
|
||||
},
|
||||
{
|
||||
"id": 315,
|
||||
"logprob": -0.0012636185,
|
||||
"logprob": -0.0011396408,
|
||||
"special": false,
|
||||
"text": " of"
|
||||
},
|
||||
{
|
||||
"id": 5662,
|
||||
"logprob": -0.12878418,
|
||||
"logprob": -0.16442871,
|
||||
"special": false,
|
||||
"text": " machine"
|
||||
},
|
||||
{
|
||||
"id": 6832,
|
||||
"logprob": -0.0015888214,
|
||||
"logprob": -0.0026416779,
|
||||
"special": false,
|
||||
"text": " learning"
|
||||
},
|
||||
{
|
||||
"id": 429,
|
||||
"logprob": -0.49194336,
|
||||
"logprob": -0.48754883,
|
||||
"special": false,
|
||||
"text": " that"
|
||||
},
|
||||
{
|
||||
"id": 5711,
|
||||
"logprob": -1.2626953,
|
||||
"logprob": -1.2294922,
|
||||
"special": false,
|
||||
"text": " uses"
|
||||
}
|
||||
|
@ -27,15 +27,16 @@ async def test_compressed_tensors_w8a8_int_dynamic_weight(
|
||||
):
|
||||
response = await compressed_tensors_w8a8_int_dynamic_weight.generate(
|
||||
"What is deep learning?",
|
||||
max_new_tokens=10,
|
||||
# prefer a longer response than the default, allow the llm to end generation
|
||||
max_new_tokens=1000,
|
||||
decoder_input_details=True,
|
||||
)
|
||||
|
||||
assert (
|
||||
response.generated_text
|
||||
== " Deep learning is a subset of machine learning that uses"
|
||||
== " Deep learning is a subset of machine learning that uses neural networks to learn from data. It is a type of artificial intelligence that can learn from and make predictions on large amounts of data. Deep learning is used in a variety of applications, including image and speech recognition, natural language processing, and autonomous vehicles. It is a rapidly growing field with many potential applications in the future."
|
||||
)
|
||||
assert response.details.generated_tokens == 10
|
||||
assert response.details.generated_tokens == 76
|
||||
assert response == response_snapshot
|
||||
|
||||
|
||||
@ -64,7 +65,7 @@ async def test_compressed_tensors_w8a8_int_dynamic_weight_all_params(
|
||||
assert response.details.generated_tokens == 10
|
||||
assert (
|
||||
response.generated_text
|
||||
== "What is deep learning?\n\nDeep Learning is an artificial intelligence (AI"
|
||||
== "What is deep learning?\nDeep Learning (DL), or artificial neural networks"
|
||||
)
|
||||
assert response == response_snapshot
|
||||
|
||||
|
@ -1400,6 +1400,10 @@ class FlashCausalLM(Model):
|
||||
cache_lengths = [0] * bs
|
||||
if max_bs is None:
|
||||
input_ids = torch.zeros(bs, dtype=torch.int64, device=self.device)
|
||||
if hasattr(self.model, "get_position_ids"):
|
||||
# use model specific position ids for initialization
|
||||
position_ids = self.model.get_position_ids(input_ids)
|
||||
else:
|
||||
position_ids = torch.zeros(bs, dtype=torch.int32, device=self.device)
|
||||
slots = torch.arange(bs, dtype=torch.int64, device=self.device)
|
||||
input_lengths_tensor = (
|
||||
@ -1427,7 +1431,7 @@ class FlashCausalLM(Model):
|
||||
"Cuda graphs should be generated in decreasing order size to reduce VRAM usage"
|
||||
)
|
||||
input_ids = self.cuda_graphs[max_bs]["input_ids"][:bs]
|
||||
position_ids = self.cuda_graphs[max_bs]["position_ids"][:bs]
|
||||
position_ids = self.cuda_graphs[max_bs]["position_ids"][..., :bs]
|
||||
if ATTENTION == "flashinfer":
|
||||
block_tables = self.cuda_graphs[max_bs]["block_tables"][: bs * max_bt]
|
||||
else:
|
||||
@ -1456,14 +1460,6 @@ class FlashCausalLM(Model):
|
||||
else:
|
||||
state = None
|
||||
|
||||
if (
|
||||
hasattr(self.model, "config")
|
||||
and hasattr(self.model.config, "model_type")
|
||||
and self.model.config.model_type == "qwen2_vl"
|
||||
):
|
||||
if position_ids.dim() == 1:
|
||||
position_ids = self.model.get_position_ids(input_ids)
|
||||
|
||||
graph = torch.cuda.CUDAGraph()
|
||||
self.cuda_graphs[bs] = {
|
||||
"input_ids": input_ids,
|
||||
@ -1486,10 +1482,6 @@ class FlashCausalLM(Model):
|
||||
state=state,
|
||||
cache_lengths_tensor=cache_lengths_tensor,
|
||||
):
|
||||
# in the case of N dimensional position ids we need to slice the
|
||||
# position ids to match the input_ids size for cuda graphs warmup
|
||||
position_ids = position_ids[..., : input_ids.shape[0]]
|
||||
|
||||
seqlen = Seqlen(
|
||||
input_lengths=input_lengths_tensor,
|
||||
cache_lengths=cache_lengths_tensor,
|
||||
|
Loading…
Reference in New Issue
Block a user