mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-19 22:02:06 +00:00
# What does this PR do? Reworked the loading logic. Idea is to use cleaner loading code: - Remove need for `no_init_weights` - Remove all weird `bnb_linear` and `load_weights` and `post_load_weights`. New code layout: - New class `Weights` in charge of handling loading the weights from multiple files into appropiate tensors (potentially sharded) - TP layers now are "shells", they contain the code to know what kind of sharding we need + eventual `all_reduce`. They do not inherit from linear, but they contain some kind of Linear instead - the contained linear can be either FastLinear, BnbLinear or GPTq Linear next. - All modeling code is explictly made for sharding, process group is just no-ops for non sharded code (removes a lot of test cases)  --------- Co-authored-by: Ubuntu <ubuntu@ip-172-31-41-161.taildb5d.ts.net> Co-authored-by: Ubuntu <ubuntu@ip-172-31-41-161.ec2.internal> Co-authored-by: OlivierDehaene <olivier@huggingface.co> Co-authored-by: OlivierDehaene <23298448+OlivierDehaene@users.noreply.github.com>
114 lines
2.1 KiB
JSON
114 lines
2.1 KiB
JSON
{
|
|
"details": {
|
|
"best_of_sequences": null,
|
|
"finish_reason": "length",
|
|
"generated_tokens": 10,
|
|
"prefill": [
|
|
{
|
|
"id": 50278,
|
|
"logprob": null,
|
|
"text": "<|USER|>"
|
|
},
|
|
{
|
|
"id": 1276,
|
|
"logprob": -4.5546875,
|
|
"text": "What"
|
|
},
|
|
{
|
|
"id": 434,
|
|
"logprob": -4.1992188,
|
|
"text": "'s"
|
|
},
|
|
{
|
|
"id": 634,
|
|
"logprob": -5.125,
|
|
"text": " your"
|
|
},
|
|
{
|
|
"id": 12315,
|
|
"logprob": -9.8984375,
|
|
"text": " mood"
|
|
},
|
|
{
|
|
"id": 3063,
|
|
"logprob": -4.0976562,
|
|
"text": " today"
|
|
},
|
|
{
|
|
"id": 32,
|
|
"logprob": -0.14562988,
|
|
"text": "?"
|
|
},
|
|
{
|
|
"id": 50279,
|
|
"logprob": -0.26733398,
|
|
"text": "<|ASSISTANT|>"
|
|
}
|
|
],
|
|
"seed": null,
|
|
"tokens": [
|
|
{
|
|
"id": 42,
|
|
"logprob": -0.86279297,
|
|
"special": false,
|
|
"text": "I"
|
|
},
|
|
{
|
|
"id": 1353,
|
|
"logprob": -0.94921875,
|
|
"special": false,
|
|
"text": "'m"
|
|
},
|
|
{
|
|
"id": 7016,
|
|
"logprob": -2.1835938,
|
|
"special": false,
|
|
"text": " sorry"
|
|
},
|
|
{
|
|
"id": 13,
|
|
"logprob": -0.074035645,
|
|
"special": false,
|
|
"text": ","
|
|
},
|
|
{
|
|
"id": 1394,
|
|
"logprob": -0.86376953,
|
|
"special": false,
|
|
"text": "You"
|
|
},
|
|
{
|
|
"id": 452,
|
|
"logprob": -1.2070312,
|
|
"special": false,
|
|
"text": " have"
|
|
},
|
|
{
|
|
"id": 247,
|
|
"logprob": -1.4365234,
|
|
"special": false,
|
|
"text": " a"
|
|
},
|
|
{
|
|
"id": 4327,
|
|
"logprob": -1.109375,
|
|
"special": false,
|
|
"text": " choice"
|
|
},
|
|
{
|
|
"id": 273,
|
|
"logprob": -0.93408203,
|
|
"special": false,
|
|
"text": " of"
|
|
},
|
|
{
|
|
"id": 752,
|
|
"logprob": -1.8808594,
|
|
"special": false,
|
|
"text": " what"
|
|
}
|
|
]
|
|
},
|
|
"generated_text": "I'm sorry,You have a choice of what"
|
|
}
|