Fix and test sharded version.

This commit is contained in:
Nicolas Patry 2023-09-25 10:21:46 +00:00
parent cbf047b4ae
commit 97292ec21c
8 changed files with 881 additions and 235 deletions

View File

@ -10,80 +10,95 @@
"text": "<s>"
},
{
"id": 4321,
"logprob": -8.515625,
"text": "Test"
"id": 1724,
"logprob": -7.703125,
"text": "What"
},
{
"id": 2009,
"logprob": -15.4140625,
"text": "request"
"id": 338,
"logprob": -1.4765625,
"text": "is"
},
{
"id": 21784,
"logprob": -9.390625,
"text": "Deep"
},
{
"id": 29257,
"logprob": -1.8583984,
"text": "Learning"
},
{
"id": 29973,
"logprob": -0.7548828,
"text": "?"
}
],
"seed": null,
"tokens": [
{
"id": 29896,
"logprob": -2.0292969,
"special": false,
"text": "1"
},
{
"id": 13,
"logprob": -2.2597656,
"logprob": -1.9306641,
"special": false,
"text": "\n"
},
{
"id": 30166,
"logprob": -3.8671875,
"id": 5618,
"logprob": -2.4550781,
"special": false,
"text": ""
"text": "What"
},
{
"id": 30166,
"logprob": -1.0488281,
"id": 338,
"logprob": -0.5732422,
"special": false,
"text": ""
"text": " is"
},
{
"id": 30166,
"logprob": -0.24523926,
"id": 278,
"logprob": -1.5761719,
"special": false,
"text": ""
"text": " the"
},
{
"id": 30166,
"logprob": -0.07897949,
"id": 4328,
"logprob": -1.5888672,
"special": false,
"text": ""
"text": " difference"
},
{
"id": 30166,
"logprob": -0.023513794,
"id": 1546,
"logprob": -0.026504517,
"special": false,
"text": ""
"text": " between"
},
{
"id": 30166,
"logprob": -0.011444092,
"id": 21784,
"logprob": -1.4287109,
"special": false,
"text": ""
"text": " Deep"
},
{
"id": 30166,
"logprob": -0.008430481,
"id": 29257,
"logprob": -0.15856934,
"special": false,
"text": ""
"text": " Learning"
},
{
"id": 30166,
"logprob": -0.007648468,
"id": 322,
"logprob": -0.17456055,
"special": false,
"text": ""
"text": " and"
},
{
"id": 6189,
"logprob": -0.62646484,
"special": false,
"text": " Machine"
}
],
"top_tokens": null
},
"generated_text": "1\n"
"generated_text": "\nWhat is the difference between Deep Learning and Machine"
}

View File

@ -10,80 +10,90 @@
"text": "<s>"
},
{
"id": 4321,
"logprob": -8.515625,
"text": "Test"
"id": 338,
"logprob": -9.0859375,
"text": "is"
},
{
"id": 2009,
"logprob": -15.4140625,
"text": "request"
"id": 21784,
"logprob": -10.90625,
"text": "Deep"
},
{
"id": 29257,
"logprob": -2.65625,
"text": "Learning"
},
{
"id": 29973,
"logprob": -4.8085938,
"text": "?"
}
],
"seed": 0,
"tokens": [
{
"id": 29896,
"logprob": 0.0,
"special": false,
"text": "1"
},
{
"id": 13,
"logprob": -0.6254883,
"logprob": -0.19958496,
"special": false,
"text": "\n"
},
{
"id": 30166,
"id": 4013,
"logprob": -2.203125,
"special": false,
"text": "This"
},
{
"id": 1139,
"logprob": -0.23693848,
"special": false,
"text": " question"
},
{
"id": 756,
"logprob": 0.0,
"special": false,
"text": ""
"text": " has"
},
{
"id": 29918,
"logprob": -0.20141602,
"id": 1063,
"logprob": -0.076538086,
"special": false,
"text": "_"
"text": " been"
},
{
"id": 29906,
"logprob": -0.6254883,
"special": false,
"text": "2"
},
{
"id": 29871,
"id": 4433,
"logprob": 0.0,
"special": false,
"text": " "
"text": " asked"
},
{
"id": 30166,
"logprob": 0.0,
"id": 1784,
"logprob": -1.1367188,
"special": false,
"text": ""
"text": " many"
},
{
"id": 30166,
"id": 3064,
"logprob": 0.0,
"special": false,
"text": ""
"text": " times"
},
{
"id": 30166,
"logprob": 0.0,
"id": 322,
"logprob": -1.7460938,
"special": false,
"text": ""
"text": " and"
},
{
"id": 30166,
"id": 306,
"logprob": 0.0,
"special": false,
"text": ""
"text": " I"
}
],
"top_tokens": null
},
"generated_text": "Test request1\n_2 "
"generated_text": "What is Deep Learning?\nThis question has been asked many times and I"
}

View File

@ -11,82 +11,97 @@
"text": "<s>"
},
{
"id": 4321,
"logprob": -8.515625,
"text": "Test"
"id": 1724,
"logprob": -7.703125,
"text": "What"
},
{
"id": 2009,
"logprob": -15.4140625,
"text": "request"
"id": 338,
"logprob": -1.4765625,
"text": "is"
},
{
"id": 21784,
"logprob": -9.390625,
"text": "Deep"
},
{
"id": 29257,
"logprob": -1.8652344,
"text": "Learning"
},
{
"id": 29973,
"logprob": -0.7548828,
"text": "?"
}
],
"seed": null,
"tokens": [
{
"id": 29896,
"logprob": -2.0292969,
"special": false,
"text": "1"
},
{
"id": 13,
"logprob": -2.2617188,
"logprob": -1.9306641,
"special": false,
"text": "\n"
},
{
"id": 30166,
"logprob": -3.8671875,
"id": 5618,
"logprob": -2.4550781,
"special": false,
"text": ""
"text": "What"
},
{
"id": 30166,
"logprob": -1.0498047,
"id": 338,
"logprob": -0.5732422,
"special": false,
"text": ""
"text": " is"
},
{
"id": 30166,
"logprob": -0.24523926,
"id": 278,
"logprob": -1.5761719,
"special": false,
"text": ""
"text": " the"
},
{
"id": 30166,
"logprob": -0.07897949,
"id": 4328,
"logprob": -1.5888672,
"special": false,
"text": ""
"text": " difference"
},
{
"id": 30166,
"logprob": -0.023529053,
"id": 1546,
"logprob": -0.026504517,
"special": false,
"text": ""
"text": " between"
},
{
"id": 30166,
"logprob": -0.011444092,
"id": 21784,
"logprob": -1.4287109,
"special": false,
"text": ""
"text": " Deep"
},
{
"id": 30166,
"logprob": -0.008300781,
"id": 29257,
"logprob": -0.15856934,
"special": false,
"text": ""
"text": " Learning"
},
{
"id": 30166,
"logprob": -0.007648468,
"id": 322,
"logprob": -0.17456055,
"special": false,
"text": ""
"text": " and"
},
{
"id": 6189,
"logprob": -0.62646484,
"special": false,
"text": " Machine"
}
],
"top_tokens": null
},
"generated_text": "1\n"
"generated_text": "\nWhat is the difference between Deep Learning and Machine"
},
{
"details": {
@ -100,82 +115,97 @@
"text": "<s>"
},
{
"id": 4321,
"logprob": -8.515625,
"text": "Test"
"id": 1724,
"logprob": -7.703125,
"text": "What"
},
{
"id": 2009,
"logprob": -15.4140625,
"text": "request"
"id": 338,
"logprob": -1.4765625,
"text": "is"
},
{
"id": 21784,
"logprob": -9.390625,
"text": "Deep"
},
{
"id": 29257,
"logprob": -1.8583984,
"text": "Learning"
},
{
"id": 29973,
"logprob": -0.7548828,
"text": "?"
}
],
"seed": null,
"tokens": [
{
"id": 29896,
"logprob": -2.0292969,
"special": false,
"text": "1"
},
{
"id": 13,
"logprob": -2.2617188,
"logprob": -1.9306641,
"special": false,
"text": "\n"
},
{
"id": 30166,
"logprob": -3.8671875,
"id": 5618,
"logprob": -2.4550781,
"special": false,
"text": ""
"text": "What"
},
{
"id": 30166,
"logprob": -1.0498047,
"id": 338,
"logprob": -0.5732422,
"special": false,
"text": ""
"text": " is"
},
{
"id": 30166,
"logprob": -0.24523926,
"id": 278,
"logprob": -1.5761719,
"special": false,
"text": ""
"text": " the"
},
{
"id": 30166,
"logprob": -0.07897949,
"id": 4328,
"logprob": -1.5888672,
"special": false,
"text": ""
"text": " difference"
},
{
"id": 30166,
"logprob": -0.023529053,
"id": 1546,
"logprob": -0.026504517,
"special": false,
"text": ""
"text": " between"
},
{
"id": 30166,
"logprob": -0.011444092,
"id": 21784,
"logprob": -1.4287109,
"special": false,
"text": ""
"text": " Deep"
},
{
"id": 30166,
"logprob": -0.008300781,
"id": 29257,
"logprob": -0.15856934,
"special": false,
"text": ""
"text": " Learning"
},
{
"id": 30166,
"logprob": -0.007648468,
"id": 322,
"logprob": -0.17456055,
"special": false,
"text": ""
"text": " and"
},
{
"id": 6189,
"logprob": -0.62646484,
"special": false,
"text": " Machine"
}
],
"top_tokens": null
},
"generated_text": "1\n"
"generated_text": "\nWhat is the difference between Deep Learning and Machine"
},
{
"details": {
@ -189,82 +219,97 @@
"text": "<s>"
},
{
"id": 4321,
"logprob": -8.515625,
"text": "Test"
"id": 1724,
"logprob": -7.703125,
"text": "What"
},
{
"id": 2009,
"logprob": -15.4140625,
"text": "request"
"id": 338,
"logprob": -1.4765625,
"text": "is"
},
{
"id": 21784,
"logprob": -9.390625,
"text": "Deep"
},
{
"id": 29257,
"logprob": -1.8652344,
"text": "Learning"
},
{
"id": 29973,
"logprob": -0.7548828,
"text": "?"
}
],
"seed": null,
"tokens": [
{
"id": 29896,
"logprob": -2.0292969,
"special": false,
"text": "1"
},
{
"id": 13,
"logprob": -2.2617188,
"logprob": -1.9306641,
"special": false,
"text": "\n"
},
{
"id": 30166,
"logprob": -3.8671875,
"id": 5618,
"logprob": -2.4550781,
"special": false,
"text": ""
"text": "What"
},
{
"id": 30166,
"logprob": -1.0498047,
"id": 338,
"logprob": -0.5732422,
"special": false,
"text": ""
"text": " is"
},
{
"id": 30166,
"logprob": -0.24523926,
"id": 278,
"logprob": -1.5761719,
"special": false,
"text": ""
"text": " the"
},
{
"id": 30166,
"logprob": -0.07897949,
"id": 4328,
"logprob": -1.5888672,
"special": false,
"text": ""
"text": " difference"
},
{
"id": 30166,
"logprob": -0.023529053,
"id": 1546,
"logprob": -0.026504517,
"special": false,
"text": ""
"text": " between"
},
{
"id": 30166,
"logprob": -0.011444092,
"id": 21784,
"logprob": -1.4287109,
"special": false,
"text": ""
"text": " Deep"
},
{
"id": 30166,
"logprob": -0.008300781,
"id": 29257,
"logprob": -0.15856934,
"special": false,
"text": ""
"text": " Learning"
},
{
"id": 30166,
"logprob": -0.007648468,
"id": 322,
"logprob": -0.17456055,
"special": false,
"text": ""
"text": " and"
},
{
"id": 6189,
"logprob": -0.62646484,
"special": false,
"text": " Machine"
}
],
"top_tokens": null
},
"generated_text": "1\n"
"generated_text": "\nWhat is the difference between Deep Learning and Machine"
},
{
"details": {
@ -278,81 +323,96 @@
"text": "<s>"
},
{
"id": 4321,
"logprob": -8.515625,
"text": "Test"
"id": 1724,
"logprob": -7.703125,
"text": "What"
},
{
"id": 2009,
"logprob": -15.4140625,
"text": "request"
"id": 338,
"logprob": -1.4765625,
"text": "is"
},
{
"id": 21784,
"logprob": -9.390625,
"text": "Deep"
},
{
"id": 29257,
"logprob": -1.8652344,
"text": "Learning"
},
{
"id": 29973,
"logprob": -0.7548828,
"text": "?"
}
],
"seed": null,
"tokens": [
{
"id": 29896,
"logprob": -2.0292969,
"special": false,
"text": "1"
},
{
"id": 13,
"logprob": -2.2617188,
"logprob": -1.9306641,
"special": false,
"text": "\n"
},
{
"id": 30166,
"logprob": -3.8671875,
"id": 5618,
"logprob": -2.4550781,
"special": false,
"text": ""
"text": "What"
},
{
"id": 30166,
"logprob": -1.0498047,
"id": 338,
"logprob": -0.5732422,
"special": false,
"text": ""
"text": " is"
},
{
"id": 30166,
"logprob": -0.24523926,
"id": 278,
"logprob": -1.5761719,
"special": false,
"text": ""
"text": " the"
},
{
"id": 30166,
"logprob": -0.07897949,
"id": 4328,
"logprob": -1.5888672,
"special": false,
"text": ""
"text": " difference"
},
{
"id": 30166,
"logprob": -0.023529053,
"id": 1546,
"logprob": -0.026504517,
"special": false,
"text": ""
"text": " between"
},
{
"id": 30166,
"logprob": -0.011444092,
"id": 21784,
"logprob": -1.4287109,
"special": false,
"text": ""
"text": " Deep"
},
{
"id": 30166,
"logprob": -0.008300781,
"id": 29257,
"logprob": -0.15856934,
"special": false,
"text": ""
"text": " Learning"
},
{
"id": 30166,
"logprob": -0.007648468,
"id": 322,
"logprob": -0.17456055,
"special": false,
"text": ""
"text": " and"
},
{
"id": 6189,
"logprob": -0.62646484,
"special": false,
"text": " Machine"
}
],
"top_tokens": null
},
"generated_text": "1\n"
"generated_text": "\nWhat is the difference between Deep Learning and Machine"
}
]

View File

@ -0,0 +1,418 @@
[
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 1,
"logprob": null,
"text": "<s>"
},
{
"id": 1724,
"logprob": -7.6914062,
"text": "What"
},
{
"id": 338,
"logprob": -1.4746094,
"text": "is"
},
{
"id": 21784,
"logprob": -9.390625,
"text": "Deep"
},
{
"id": 29257,
"logprob": -1.8623047,
"text": "Learning"
},
{
"id": 29973,
"logprob": -0.7558594,
"text": "?"
}
],
"seed": null,
"tokens": [
{
"id": 13,
"logprob": -1.9228516,
"special": false,
"text": "\n"
},
{
"id": 5618,
"logprob": -2.4609375,
"special": false,
"text": "What"
},
{
"id": 338,
"logprob": -0.57177734,
"special": false,
"text": " is"
},
{
"id": 278,
"logprob": -1.5722656,
"special": false,
"text": " the"
},
{
"id": 4328,
"logprob": -1.5859375,
"special": false,
"text": " difference"
},
{
"id": 1546,
"logprob": -0.02633667,
"special": false,
"text": " between"
},
{
"id": 21784,
"logprob": -1.4335938,
"special": false,
"text": " Deep"
},
{
"id": 29257,
"logprob": -0.15991211,
"special": false,
"text": " Learning"
},
{
"id": 322,
"logprob": -0.17456055,
"special": false,
"text": " and"
},
{
"id": 6189,
"logprob": -0.62060547,
"special": false,
"text": " Machine"
}
],
"top_tokens": null
},
"generated_text": "\nWhat is the difference between Deep Learning and Machine"
},
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 1,
"logprob": null,
"text": "<s>"
},
{
"id": 1724,
"logprob": -7.6914062,
"text": "What"
},
{
"id": 338,
"logprob": -1.4746094,
"text": "is"
},
{
"id": 21784,
"logprob": -9.390625,
"text": "Deep"
},
{
"id": 29257,
"logprob": -1.8623047,
"text": "Learning"
},
{
"id": 29973,
"logprob": -0.7558594,
"text": "?"
}
],
"seed": null,
"tokens": [
{
"id": 13,
"logprob": -1.9228516,
"special": false,
"text": "\n"
},
{
"id": 5618,
"logprob": -2.4609375,
"special": false,
"text": "What"
},
{
"id": 338,
"logprob": -0.57177734,
"special": false,
"text": " is"
},
{
"id": 278,
"logprob": -1.5722656,
"special": false,
"text": " the"
},
{
"id": 4328,
"logprob": -1.5859375,
"special": false,
"text": " difference"
},
{
"id": 1546,
"logprob": -0.02633667,
"special": false,
"text": " between"
},
{
"id": 21784,
"logprob": -1.4335938,
"special": false,
"text": " Deep"
},
{
"id": 29257,
"logprob": -0.15991211,
"special": false,
"text": " Learning"
},
{
"id": 322,
"logprob": -0.17456055,
"special": false,
"text": " and"
},
{
"id": 6189,
"logprob": -0.62060547,
"special": false,
"text": " Machine"
}
],
"top_tokens": null
},
"generated_text": "\nWhat is the difference between Deep Learning and Machine"
},
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 1,
"logprob": null,
"text": "<s>"
},
{
"id": 1724,
"logprob": -7.6914062,
"text": "What"
},
{
"id": 338,
"logprob": -1.4746094,
"text": "is"
},
{
"id": 21784,
"logprob": -9.390625,
"text": "Deep"
},
{
"id": 29257,
"logprob": -1.8623047,
"text": "Learning"
},
{
"id": 29973,
"logprob": -0.7558594,
"text": "?"
}
],
"seed": null,
"tokens": [
{
"id": 13,
"logprob": -1.9228516,
"special": false,
"text": "\n"
},
{
"id": 5618,
"logprob": -2.4609375,
"special": false,
"text": "What"
},
{
"id": 338,
"logprob": -0.57177734,
"special": false,
"text": " is"
},
{
"id": 278,
"logprob": -1.5722656,
"special": false,
"text": " the"
},
{
"id": 4328,
"logprob": -1.5859375,
"special": false,
"text": " difference"
},
{
"id": 1546,
"logprob": -0.02633667,
"special": false,
"text": " between"
},
{
"id": 21784,
"logprob": -1.4335938,
"special": false,
"text": " Deep"
},
{
"id": 29257,
"logprob": -0.15991211,
"special": false,
"text": " Learning"
},
{
"id": 322,
"logprob": -0.17456055,
"special": false,
"text": " and"
},
{
"id": 6189,
"logprob": -0.62060547,
"special": false,
"text": " Machine"
}
],
"top_tokens": null
},
"generated_text": "\nWhat is the difference between Deep Learning and Machine"
},
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 1,
"logprob": null,
"text": "<s>"
},
{
"id": 1724,
"logprob": -7.6914062,
"text": "What"
},
{
"id": 338,
"logprob": -1.4746094,
"text": "is"
},
{
"id": 21784,
"logprob": -9.390625,
"text": "Deep"
},
{
"id": 29257,
"logprob": -1.8623047,
"text": "Learning"
},
{
"id": 29973,
"logprob": -0.7558594,
"text": "?"
}
],
"seed": null,
"tokens": [
{
"id": 13,
"logprob": -1.9228516,
"special": false,
"text": "\n"
},
{
"id": 5618,
"logprob": -2.4609375,
"special": false,
"text": "What"
},
{
"id": 338,
"logprob": -0.57177734,
"special": false,
"text": " is"
},
{
"id": 278,
"logprob": -1.5722656,
"special": false,
"text": " the"
},
{
"id": 4328,
"logprob": -1.5859375,
"special": false,
"text": " difference"
},
{
"id": 1546,
"logprob": -0.02633667,
"special": false,
"text": " between"
},
{
"id": 21784,
"logprob": -1.4335938,
"special": false,
"text": " Deep"
},
{
"id": 29257,
"logprob": -0.15991211,
"special": false,
"text": " Learning"
},
{
"id": 322,
"logprob": -0.17456055,
"special": false,
"text": " and"
},
{
"id": 6189,
"logprob": -0.62060547,
"special": false,
"text": " Machine"
}
],
"top_tokens": null
},
"generated_text": "\nWhat is the difference between Deep Learning and Machine"
}
]

View File

@ -0,0 +1,104 @@
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 1,
"logprob": null,
"text": "<s>"
},
{
"id": 1724,
"logprob": -7.6914062,
"text": "What"
},
{
"id": 338,
"logprob": -1.4746094,
"text": "is"
},
{
"id": 21784,
"logprob": -9.390625,
"text": "Deep"
},
{
"id": 29257,
"logprob": -1.8623047,
"text": "Learning"
},
{
"id": 29973,
"logprob": -0.7558594,
"text": "?"
}
],
"seed": null,
"tokens": [
{
"id": 13,
"logprob": -1.9228516,
"special": false,
"text": "\n"
},
{
"id": 5618,
"logprob": -2.4609375,
"special": false,
"text": "What"
},
{
"id": 338,
"logprob": -0.57177734,
"special": false,
"text": " is"
},
{
"id": 278,
"logprob": -1.5722656,
"special": false,
"text": " the"
},
{
"id": 4328,
"logprob": -1.5927734,
"special": false,
"text": " difference"
},
{
"id": 1546,
"logprob": -0.026428223,
"special": false,
"text": " between"
},
{
"id": 21784,
"logprob": -1.4267578,
"special": false,
"text": " Deep"
},
{
"id": 29257,
"logprob": -0.16015625,
"special": false,
"text": " Learning"
},
{
"id": 322,
"logprob": -0.17382812,
"special": false,
"text": " and"
},
{
"id": 6189,
"logprob": -0.62060547,
"special": false,
"text": " Machine"
}
],
"top_tokens": null
},
"generated_text": "\nWhat is the difference between Deep Learning and Machine"
}

View File

@ -3,7 +3,7 @@ import pytest
@pytest.fixture(scope="module")
def flash_llama_awq_handle(launcher):
with launcher("abhinavkulkarni/codellama-CodeLlama-7b-Python-hf-w4-g128-awq", num_shard=2, quantize="awq") as handle:
with launcher("abhinavkulkarni/codellama-CodeLlama-7b-Python-hf-w4-g128-awq", num_shard=1, quantize="awq") as handle:
yield handle
@ -12,23 +12,24 @@ async def flash_llama_awq(flash_llama_awq_handle):
await flash_llama_awq_handle.health(300)
return flash_llama_awq_handle.client
@pytest.mark.asyncio
@pytest.mark.private
async def test_flash_llama_awq(flash_llama_awq, response_snapshot):
response = await flash_llama_awq.generate(
"Test request", max_new_tokens=10, decoder_input_details=True
"What is Deep Learning?", max_new_tokens=10, decoder_input_details=True
)
assert response.details.generated_tokens == 10
assert response.generated_text == "\nWhat is the difference between Deep Learning and Machine"
assert response == response_snapshot
@pytest.mark.asyncio
@pytest.mark.private
async def test_flash_llama_awq_all_params(flash_llama_awq, response_snapshot):
response = await flash_llama_awq.generate(
"Test request",
"What is Deep Learning?",
max_new_tokens=10,
repetition_penalty=1.2,
return_full_text=True,
@ -52,10 +53,12 @@ async def test_flash_llama_awq_load(
flash_llama_awq, generate_load, response_snapshot
):
responses = await generate_load(
flash_llama_awq, "Test request", max_new_tokens=10, n=4
flash_llama_awq, "What is Deep Learning?", max_new_tokens=10, n=4
)
assert len(responses) == 4
assert all([r.generated_text == responses[0].generated_text for r in responses])
assert all([r.generated_text == "\nWhat is the difference between Deep Learning and Machine" for r in responses])
assert responses == response_snapshot

View File

@ -0,0 +1,36 @@
import pytest
@pytest.fixture(scope="module")
def flash_llama_awq_handle_sharded(launcher):
with launcher("abhinavkulkarni/codellama-CodeLlama-7b-Python-hf-w4-g128-awq", num_shard=2, quantize="awq") as handle:
yield handle
@pytest.fixture(scope="module")
async def flash_llama_awq_sharded(flash_llama_awq_handle_sharded):
await flash_llama_awq_handle_sharded.health(300)
return flash_llama_awq_handle_sharded.client
@pytest.mark.asyncio
@pytest.mark.private
async def test_flash_llama_awq_sharded(flash_llama_awq_sharded, response_snapshot):
response = await flash_llama_awq_sharded.generate(
"What is Deep Learning?", max_new_tokens=10, decoder_input_details=True
)
assert response.details.generated_tokens == 10
assert response.generated_text == "\nWhat is the difference between Deep Learning and Machine"
assert response == response_snapshot
@pytest.mark.asyncio
@pytest.mark.private
async def test_flash_llama_awq_load_sharded(
flash_llama_awq_sharded, generate_load, response_snapshot
):
responses = await generate_load(
flash_llama_awq_sharded, "What is Deep Learning?", max_new_tokens=10, n=4
)
assert len(responses) == 4
assert all([r.generated_text == "\nWhat is the difference between Deep Learning and Machine" for r in responses])
assert responses == response_snapshot

View File

@ -299,8 +299,8 @@ class Weights:
"Cannot load `awq` weight, make sure the model is already quantized"
)
qzeros = self.get_tensor(f"{prefix}.qzeros")
scales = self.get_tensor(f"{prefix}.scales")
qzeros = self.get_sharded(f"{prefix}.qzeros", dim=0)
scales = self.get_sharded(f"{prefix}.scales", dim=0)
g_idx = None
use_exllama = False