Fix and test sharded version.

This commit is contained in:
Nicolas Patry 2023-09-25 10:21:46 +00:00
parent cbf047b4ae
commit 97292ec21c
8 changed files with 881 additions and 235 deletions

View File

@ -10,80 +10,95 @@
"text": "<s>" "text": "<s>"
}, },
{ {
"id": 4321, "id": 1724,
"logprob": -8.515625, "logprob": -7.703125,
"text": "Test" "text": "What"
}, },
{ {
"id": 2009, "id": 338,
"logprob": -15.4140625, "logprob": -1.4765625,
"text": "request" "text": "is"
},
{
"id": 21784,
"logprob": -9.390625,
"text": "Deep"
},
{
"id": 29257,
"logprob": -1.8583984,
"text": "Learning"
},
{
"id": 29973,
"logprob": -0.7548828,
"text": "?"
} }
], ],
"seed": null, "seed": null,
"tokens": [ "tokens": [
{
"id": 29896,
"logprob": -2.0292969,
"special": false,
"text": "1"
},
{ {
"id": 13, "id": 13,
"logprob": -2.2597656, "logprob": -1.9306641,
"special": false, "special": false,
"text": "\n" "text": "\n"
}, },
{ {
"id": 30166, "id": 5618,
"logprob": -3.8671875, "logprob": -2.4550781,
"special": false, "special": false,
"text": "" "text": "What"
}, },
{ {
"id": 30166, "id": 338,
"logprob": -1.0488281, "logprob": -0.5732422,
"special": false, "special": false,
"text": "" "text": " is"
}, },
{ {
"id": 30166, "id": 278,
"logprob": -0.24523926, "logprob": -1.5761719,
"special": false, "special": false,
"text": "" "text": " the"
}, },
{ {
"id": 30166, "id": 4328,
"logprob": -0.07897949, "logprob": -1.5888672,
"special": false, "special": false,
"text": "" "text": " difference"
}, },
{ {
"id": 30166, "id": 1546,
"logprob": -0.023513794, "logprob": -0.026504517,
"special": false, "special": false,
"text": "" "text": " between"
}, },
{ {
"id": 30166, "id": 21784,
"logprob": -0.011444092, "logprob": -1.4287109,
"special": false, "special": false,
"text": "" "text": " Deep"
}, },
{ {
"id": 30166, "id": 29257,
"logprob": -0.008430481, "logprob": -0.15856934,
"special": false, "special": false,
"text": "" "text": " Learning"
}, },
{ {
"id": 30166, "id": 322,
"logprob": -0.007648468, "logprob": -0.17456055,
"special": false, "special": false,
"text": "" "text": " and"
},
{
"id": 6189,
"logprob": -0.62646484,
"special": false,
"text": " Machine"
} }
], ],
"top_tokens": null "top_tokens": null
}, },
"generated_text": "1\n" "generated_text": "\nWhat is the difference between Deep Learning and Machine"
} }

View File

@ -10,80 +10,90 @@
"text": "<s>" "text": "<s>"
}, },
{ {
"id": 4321, "id": 338,
"logprob": -8.515625, "logprob": -9.0859375,
"text": "Test" "text": "is"
}, },
{ {
"id": 2009, "id": 21784,
"logprob": -15.4140625, "logprob": -10.90625,
"text": "request" "text": "Deep"
},
{
"id": 29257,
"logprob": -2.65625,
"text": "Learning"
},
{
"id": 29973,
"logprob": -4.8085938,
"text": "?"
} }
], ],
"seed": 0, "seed": 0,
"tokens": [ "tokens": [
{
"id": 29896,
"logprob": 0.0,
"special": false,
"text": "1"
},
{ {
"id": 13, "id": 13,
"logprob": -0.6254883, "logprob": -0.19958496,
"special": false, "special": false,
"text": "\n" "text": "\n"
}, },
{ {
"id": 30166, "id": 4013,
"logprob": -2.203125,
"special": false,
"text": "This"
},
{
"id": 1139,
"logprob": -0.23693848,
"special": false,
"text": " question"
},
{
"id": 756,
"logprob": 0.0, "logprob": 0.0,
"special": false, "special": false,
"text": "" "text": " has"
}, },
{ {
"id": 29918, "id": 1063,
"logprob": -0.20141602, "logprob": -0.076538086,
"special": false, "special": false,
"text": "_" "text": " been"
}, },
{ {
"id": 29906, "id": 4433,
"logprob": -0.6254883,
"special": false,
"text": "2"
},
{
"id": 29871,
"logprob": 0.0, "logprob": 0.0,
"special": false, "special": false,
"text": " " "text": " asked"
}, },
{ {
"id": 30166, "id": 1784,
"logprob": 0.0, "logprob": -1.1367188,
"special": false, "special": false,
"text": "" "text": " many"
}, },
{ {
"id": 30166, "id": 3064,
"logprob": 0.0, "logprob": 0.0,
"special": false, "special": false,
"text": "" "text": " times"
}, },
{ {
"id": 30166, "id": 322,
"logprob": 0.0, "logprob": -1.7460938,
"special": false, "special": false,
"text": "" "text": " and"
}, },
{ {
"id": 30166, "id": 306,
"logprob": 0.0, "logprob": 0.0,
"special": false, "special": false,
"text": "" "text": " I"
} }
], ],
"top_tokens": null "top_tokens": null
}, },
"generated_text": "Test request1\n_2 " "generated_text": "What is Deep Learning?\nThis question has been asked many times and I"
} }

View File

@ -11,82 +11,97 @@
"text": "<s>" "text": "<s>"
}, },
{ {
"id": 4321, "id": 1724,
"logprob": -8.515625, "logprob": -7.703125,
"text": "Test" "text": "What"
}, },
{ {
"id": 2009, "id": 338,
"logprob": -15.4140625, "logprob": -1.4765625,
"text": "request" "text": "is"
},
{
"id": 21784,
"logprob": -9.390625,
"text": "Deep"
},
{
"id": 29257,
"logprob": -1.8652344,
"text": "Learning"
},
{
"id": 29973,
"logprob": -0.7548828,
"text": "?"
} }
], ],
"seed": null, "seed": null,
"tokens": [ "tokens": [
{
"id": 29896,
"logprob": -2.0292969,
"special": false,
"text": "1"
},
{ {
"id": 13, "id": 13,
"logprob": -2.2617188, "logprob": -1.9306641,
"special": false, "special": false,
"text": "\n" "text": "\n"
}, },
{ {
"id": 30166, "id": 5618,
"logprob": -3.8671875, "logprob": -2.4550781,
"special": false, "special": false,
"text": "" "text": "What"
}, },
{ {
"id": 30166, "id": 338,
"logprob": -1.0498047, "logprob": -0.5732422,
"special": false, "special": false,
"text": "" "text": " is"
}, },
{ {
"id": 30166, "id": 278,
"logprob": -0.24523926, "logprob": -1.5761719,
"special": false, "special": false,
"text": "" "text": " the"
}, },
{ {
"id": 30166, "id": 4328,
"logprob": -0.07897949, "logprob": -1.5888672,
"special": false, "special": false,
"text": "" "text": " difference"
}, },
{ {
"id": 30166, "id": 1546,
"logprob": -0.023529053, "logprob": -0.026504517,
"special": false, "special": false,
"text": "" "text": " between"
}, },
{ {
"id": 30166, "id": 21784,
"logprob": -0.011444092, "logprob": -1.4287109,
"special": false, "special": false,
"text": "" "text": " Deep"
}, },
{ {
"id": 30166, "id": 29257,
"logprob": -0.008300781, "logprob": -0.15856934,
"special": false, "special": false,
"text": "" "text": " Learning"
}, },
{ {
"id": 30166, "id": 322,
"logprob": -0.007648468, "logprob": -0.17456055,
"special": false, "special": false,
"text": "" "text": " and"
},
{
"id": 6189,
"logprob": -0.62646484,
"special": false,
"text": " Machine"
} }
], ],
"top_tokens": null "top_tokens": null
}, },
"generated_text": "1\n" "generated_text": "\nWhat is the difference between Deep Learning and Machine"
}, },
{ {
"details": { "details": {
@ -100,82 +115,97 @@
"text": "<s>" "text": "<s>"
}, },
{ {
"id": 4321, "id": 1724,
"logprob": -8.515625, "logprob": -7.703125,
"text": "Test" "text": "What"
}, },
{ {
"id": 2009, "id": 338,
"logprob": -15.4140625, "logprob": -1.4765625,
"text": "request" "text": "is"
},
{
"id": 21784,
"logprob": -9.390625,
"text": "Deep"
},
{
"id": 29257,
"logprob": -1.8583984,
"text": "Learning"
},
{
"id": 29973,
"logprob": -0.7548828,
"text": "?"
} }
], ],
"seed": null, "seed": null,
"tokens": [ "tokens": [
{
"id": 29896,
"logprob": -2.0292969,
"special": false,
"text": "1"
},
{ {
"id": 13, "id": 13,
"logprob": -2.2617188, "logprob": -1.9306641,
"special": false, "special": false,
"text": "\n" "text": "\n"
}, },
{ {
"id": 30166, "id": 5618,
"logprob": -3.8671875, "logprob": -2.4550781,
"special": false, "special": false,
"text": "" "text": "What"
}, },
{ {
"id": 30166, "id": 338,
"logprob": -1.0498047, "logprob": -0.5732422,
"special": false, "special": false,
"text": "" "text": " is"
}, },
{ {
"id": 30166, "id": 278,
"logprob": -0.24523926, "logprob": -1.5761719,
"special": false, "special": false,
"text": "" "text": " the"
}, },
{ {
"id": 30166, "id": 4328,
"logprob": -0.07897949, "logprob": -1.5888672,
"special": false, "special": false,
"text": "" "text": " difference"
}, },
{ {
"id": 30166, "id": 1546,
"logprob": -0.023529053, "logprob": -0.026504517,
"special": false, "special": false,
"text": "" "text": " between"
}, },
{ {
"id": 30166, "id": 21784,
"logprob": -0.011444092, "logprob": -1.4287109,
"special": false, "special": false,
"text": "" "text": " Deep"
}, },
{ {
"id": 30166, "id": 29257,
"logprob": -0.008300781, "logprob": -0.15856934,
"special": false, "special": false,
"text": "" "text": " Learning"
}, },
{ {
"id": 30166, "id": 322,
"logprob": -0.007648468, "logprob": -0.17456055,
"special": false, "special": false,
"text": "" "text": " and"
},
{
"id": 6189,
"logprob": -0.62646484,
"special": false,
"text": " Machine"
} }
], ],
"top_tokens": null "top_tokens": null
}, },
"generated_text": "1\n" "generated_text": "\nWhat is the difference between Deep Learning and Machine"
}, },
{ {
"details": { "details": {
@ -189,82 +219,97 @@
"text": "<s>" "text": "<s>"
}, },
{ {
"id": 4321, "id": 1724,
"logprob": -8.515625, "logprob": -7.703125,
"text": "Test" "text": "What"
}, },
{ {
"id": 2009, "id": 338,
"logprob": -15.4140625, "logprob": -1.4765625,
"text": "request" "text": "is"
},
{
"id": 21784,
"logprob": -9.390625,
"text": "Deep"
},
{
"id": 29257,
"logprob": -1.8652344,
"text": "Learning"
},
{
"id": 29973,
"logprob": -0.7548828,
"text": "?"
} }
], ],
"seed": null, "seed": null,
"tokens": [ "tokens": [
{
"id": 29896,
"logprob": -2.0292969,
"special": false,
"text": "1"
},
{ {
"id": 13, "id": 13,
"logprob": -2.2617188, "logprob": -1.9306641,
"special": false, "special": false,
"text": "\n" "text": "\n"
}, },
{ {
"id": 30166, "id": 5618,
"logprob": -3.8671875, "logprob": -2.4550781,
"special": false, "special": false,
"text": "" "text": "What"
}, },
{ {
"id": 30166, "id": 338,
"logprob": -1.0498047, "logprob": -0.5732422,
"special": false, "special": false,
"text": "" "text": " is"
}, },
{ {
"id": 30166, "id": 278,
"logprob": -0.24523926, "logprob": -1.5761719,
"special": false, "special": false,
"text": "" "text": " the"
}, },
{ {
"id": 30166, "id": 4328,
"logprob": -0.07897949, "logprob": -1.5888672,
"special": false, "special": false,
"text": "" "text": " difference"
}, },
{ {
"id": 30166, "id": 1546,
"logprob": -0.023529053, "logprob": -0.026504517,
"special": false, "special": false,
"text": "" "text": " between"
}, },
{ {
"id": 30166, "id": 21784,
"logprob": -0.011444092, "logprob": -1.4287109,
"special": false, "special": false,
"text": "" "text": " Deep"
}, },
{ {
"id": 30166, "id": 29257,
"logprob": -0.008300781, "logprob": -0.15856934,
"special": false, "special": false,
"text": "" "text": " Learning"
}, },
{ {
"id": 30166, "id": 322,
"logprob": -0.007648468, "logprob": -0.17456055,
"special": false, "special": false,
"text": "" "text": " and"
},
{
"id": 6189,
"logprob": -0.62646484,
"special": false,
"text": " Machine"
} }
], ],
"top_tokens": null "top_tokens": null
}, },
"generated_text": "1\n" "generated_text": "\nWhat is the difference between Deep Learning and Machine"
}, },
{ {
"details": { "details": {
@ -278,81 +323,96 @@
"text": "<s>" "text": "<s>"
}, },
{ {
"id": 4321, "id": 1724,
"logprob": -8.515625, "logprob": -7.703125,
"text": "Test" "text": "What"
}, },
{ {
"id": 2009, "id": 338,
"logprob": -15.4140625, "logprob": -1.4765625,
"text": "request" "text": "is"
},
{
"id": 21784,
"logprob": -9.390625,
"text": "Deep"
},
{
"id": 29257,
"logprob": -1.8652344,
"text": "Learning"
},
{
"id": 29973,
"logprob": -0.7548828,
"text": "?"
} }
], ],
"seed": null, "seed": null,
"tokens": [ "tokens": [
{
"id": 29896,
"logprob": -2.0292969,
"special": false,
"text": "1"
},
{ {
"id": 13, "id": 13,
"logprob": -2.2617188, "logprob": -1.9306641,
"special": false, "special": false,
"text": "\n" "text": "\n"
}, },
{ {
"id": 30166, "id": 5618,
"logprob": -3.8671875, "logprob": -2.4550781,
"special": false, "special": false,
"text": "" "text": "What"
}, },
{ {
"id": 30166, "id": 338,
"logprob": -1.0498047, "logprob": -0.5732422,
"special": false, "special": false,
"text": "" "text": " is"
}, },
{ {
"id": 30166, "id": 278,
"logprob": -0.24523926, "logprob": -1.5761719,
"special": false, "special": false,
"text": "" "text": " the"
}, },
{ {
"id": 30166, "id": 4328,
"logprob": -0.07897949, "logprob": -1.5888672,
"special": false, "special": false,
"text": "" "text": " difference"
}, },
{ {
"id": 30166, "id": 1546,
"logprob": -0.023529053, "logprob": -0.026504517,
"special": false, "special": false,
"text": "" "text": " between"
}, },
{ {
"id": 30166, "id": 21784,
"logprob": -0.011444092, "logprob": -1.4287109,
"special": false, "special": false,
"text": "" "text": " Deep"
}, },
{ {
"id": 30166, "id": 29257,
"logprob": -0.008300781, "logprob": -0.15856934,
"special": false, "special": false,
"text": "" "text": " Learning"
}, },
{ {
"id": 30166, "id": 322,
"logprob": -0.007648468, "logprob": -0.17456055,
"special": false, "special": false,
"text": "" "text": " and"
},
{
"id": 6189,
"logprob": -0.62646484,
"special": false,
"text": " Machine"
} }
], ],
"top_tokens": null "top_tokens": null
}, },
"generated_text": "1\n" "generated_text": "\nWhat is the difference between Deep Learning and Machine"
} }
] ]

View File

@ -0,0 +1,418 @@
[
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 1,
"logprob": null,
"text": "<s>"
},
{
"id": 1724,
"logprob": -7.6914062,
"text": "What"
},
{
"id": 338,
"logprob": -1.4746094,
"text": "is"
},
{
"id": 21784,
"logprob": -9.390625,
"text": "Deep"
},
{
"id": 29257,
"logprob": -1.8623047,
"text": "Learning"
},
{
"id": 29973,
"logprob": -0.7558594,
"text": "?"
}
],
"seed": null,
"tokens": [
{
"id": 13,
"logprob": -1.9228516,
"special": false,
"text": "\n"
},
{
"id": 5618,
"logprob": -2.4609375,
"special": false,
"text": "What"
},
{
"id": 338,
"logprob": -0.57177734,
"special": false,
"text": " is"
},
{
"id": 278,
"logprob": -1.5722656,
"special": false,
"text": " the"
},
{
"id": 4328,
"logprob": -1.5859375,
"special": false,
"text": " difference"
},
{
"id": 1546,
"logprob": -0.02633667,
"special": false,
"text": " between"
},
{
"id": 21784,
"logprob": -1.4335938,
"special": false,
"text": " Deep"
},
{
"id": 29257,
"logprob": -0.15991211,
"special": false,
"text": " Learning"
},
{
"id": 322,
"logprob": -0.17456055,
"special": false,
"text": " and"
},
{
"id": 6189,
"logprob": -0.62060547,
"special": false,
"text": " Machine"
}
],
"top_tokens": null
},
"generated_text": "\nWhat is the difference between Deep Learning and Machine"
},
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 1,
"logprob": null,
"text": "<s>"
},
{
"id": 1724,
"logprob": -7.6914062,
"text": "What"
},
{
"id": 338,
"logprob": -1.4746094,
"text": "is"
},
{
"id": 21784,
"logprob": -9.390625,
"text": "Deep"
},
{
"id": 29257,
"logprob": -1.8623047,
"text": "Learning"
},
{
"id": 29973,
"logprob": -0.7558594,
"text": "?"
}
],
"seed": null,
"tokens": [
{
"id": 13,
"logprob": -1.9228516,
"special": false,
"text": "\n"
},
{
"id": 5618,
"logprob": -2.4609375,
"special": false,
"text": "What"
},
{
"id": 338,
"logprob": -0.57177734,
"special": false,
"text": " is"
},
{
"id": 278,
"logprob": -1.5722656,
"special": false,
"text": " the"
},
{
"id": 4328,
"logprob": -1.5859375,
"special": false,
"text": " difference"
},
{
"id": 1546,
"logprob": -0.02633667,
"special": false,
"text": " between"
},
{
"id": 21784,
"logprob": -1.4335938,
"special": false,
"text": " Deep"
},
{
"id": 29257,
"logprob": -0.15991211,
"special": false,
"text": " Learning"
},
{
"id": 322,
"logprob": -0.17456055,
"special": false,
"text": " and"
},
{
"id": 6189,
"logprob": -0.62060547,
"special": false,
"text": " Machine"
}
],
"top_tokens": null
},
"generated_text": "\nWhat is the difference between Deep Learning and Machine"
},
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 1,
"logprob": null,
"text": "<s>"
},
{
"id": 1724,
"logprob": -7.6914062,
"text": "What"
},
{
"id": 338,
"logprob": -1.4746094,
"text": "is"
},
{
"id": 21784,
"logprob": -9.390625,
"text": "Deep"
},
{
"id": 29257,
"logprob": -1.8623047,
"text": "Learning"
},
{
"id": 29973,
"logprob": -0.7558594,
"text": "?"
}
],
"seed": null,
"tokens": [
{
"id": 13,
"logprob": -1.9228516,
"special": false,
"text": "\n"
},
{
"id": 5618,
"logprob": -2.4609375,
"special": false,
"text": "What"
},
{
"id": 338,
"logprob": -0.57177734,
"special": false,
"text": " is"
},
{
"id": 278,
"logprob": -1.5722656,
"special": false,
"text": " the"
},
{
"id": 4328,
"logprob": -1.5859375,
"special": false,
"text": " difference"
},
{
"id": 1546,
"logprob": -0.02633667,
"special": false,
"text": " between"
},
{
"id": 21784,
"logprob": -1.4335938,
"special": false,
"text": " Deep"
},
{
"id": 29257,
"logprob": -0.15991211,
"special": false,
"text": " Learning"
},
{
"id": 322,
"logprob": -0.17456055,
"special": false,
"text": " and"
},
{
"id": 6189,
"logprob": -0.62060547,
"special": false,
"text": " Machine"
}
],
"top_tokens": null
},
"generated_text": "\nWhat is the difference between Deep Learning and Machine"
},
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 1,
"logprob": null,
"text": "<s>"
},
{
"id": 1724,
"logprob": -7.6914062,
"text": "What"
},
{
"id": 338,
"logprob": -1.4746094,
"text": "is"
},
{
"id": 21784,
"logprob": -9.390625,
"text": "Deep"
},
{
"id": 29257,
"logprob": -1.8623047,
"text": "Learning"
},
{
"id": 29973,
"logprob": -0.7558594,
"text": "?"
}
],
"seed": null,
"tokens": [
{
"id": 13,
"logprob": -1.9228516,
"special": false,
"text": "\n"
},
{
"id": 5618,
"logprob": -2.4609375,
"special": false,
"text": "What"
},
{
"id": 338,
"logprob": -0.57177734,
"special": false,
"text": " is"
},
{
"id": 278,
"logprob": -1.5722656,
"special": false,
"text": " the"
},
{
"id": 4328,
"logprob": -1.5859375,
"special": false,
"text": " difference"
},
{
"id": 1546,
"logprob": -0.02633667,
"special": false,
"text": " between"
},
{
"id": 21784,
"logprob": -1.4335938,
"special": false,
"text": " Deep"
},
{
"id": 29257,
"logprob": -0.15991211,
"special": false,
"text": " Learning"
},
{
"id": 322,
"logprob": -0.17456055,
"special": false,
"text": " and"
},
{
"id": 6189,
"logprob": -0.62060547,
"special": false,
"text": " Machine"
}
],
"top_tokens": null
},
"generated_text": "\nWhat is the difference between Deep Learning and Machine"
}
]

View File

@ -0,0 +1,104 @@
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 1,
"logprob": null,
"text": "<s>"
},
{
"id": 1724,
"logprob": -7.6914062,
"text": "What"
},
{
"id": 338,
"logprob": -1.4746094,
"text": "is"
},
{
"id": 21784,
"logprob": -9.390625,
"text": "Deep"
},
{
"id": 29257,
"logprob": -1.8623047,
"text": "Learning"
},
{
"id": 29973,
"logprob": -0.7558594,
"text": "?"
}
],
"seed": null,
"tokens": [
{
"id": 13,
"logprob": -1.9228516,
"special": false,
"text": "\n"
},
{
"id": 5618,
"logprob": -2.4609375,
"special": false,
"text": "What"
},
{
"id": 338,
"logprob": -0.57177734,
"special": false,
"text": " is"
},
{
"id": 278,
"logprob": -1.5722656,
"special": false,
"text": " the"
},
{
"id": 4328,
"logprob": -1.5927734,
"special": false,
"text": " difference"
},
{
"id": 1546,
"logprob": -0.026428223,
"special": false,
"text": " between"
},
{
"id": 21784,
"logprob": -1.4267578,
"special": false,
"text": " Deep"
},
{
"id": 29257,
"logprob": -0.16015625,
"special": false,
"text": " Learning"
},
{
"id": 322,
"logprob": -0.17382812,
"special": false,
"text": " and"
},
{
"id": 6189,
"logprob": -0.62060547,
"special": false,
"text": " Machine"
}
],
"top_tokens": null
},
"generated_text": "\nWhat is the difference between Deep Learning and Machine"
}

View File

@ -3,7 +3,7 @@ import pytest
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
def flash_llama_awq_handle(launcher): def flash_llama_awq_handle(launcher):
with launcher("abhinavkulkarni/codellama-CodeLlama-7b-Python-hf-w4-g128-awq", num_shard=2, quantize="awq") as handle: with launcher("abhinavkulkarni/codellama-CodeLlama-7b-Python-hf-w4-g128-awq", num_shard=1, quantize="awq") as handle:
yield handle yield handle
@ -12,23 +12,24 @@ async def flash_llama_awq(flash_llama_awq_handle):
await flash_llama_awq_handle.health(300) await flash_llama_awq_handle.health(300)
return flash_llama_awq_handle.client return flash_llama_awq_handle.client
@pytest.mark.asyncio @pytest.mark.asyncio
@pytest.mark.private @pytest.mark.private
async def test_flash_llama_awq(flash_llama_awq, response_snapshot): async def test_flash_llama_awq(flash_llama_awq, response_snapshot):
response = await flash_llama_awq.generate( response = await flash_llama_awq.generate(
"Test request", max_new_tokens=10, decoder_input_details=True "What is Deep Learning?", max_new_tokens=10, decoder_input_details=True
) )
assert response.details.generated_tokens == 10 assert response.details.generated_tokens == 10
assert response.generated_text == "\nWhat is the difference between Deep Learning and Machine"
assert response == response_snapshot assert response == response_snapshot
@pytest.mark.asyncio @pytest.mark.asyncio
@pytest.mark.private @pytest.mark.private
async def test_flash_llama_awq_all_params(flash_llama_awq, response_snapshot): async def test_flash_llama_awq_all_params(flash_llama_awq, response_snapshot):
response = await flash_llama_awq.generate( response = await flash_llama_awq.generate(
"Test request", "What is Deep Learning?",
max_new_tokens=10, max_new_tokens=10,
repetition_penalty=1.2, repetition_penalty=1.2,
return_full_text=True, return_full_text=True,
@ -52,10 +53,12 @@ async def test_flash_llama_awq_load(
flash_llama_awq, generate_load, response_snapshot flash_llama_awq, generate_load, response_snapshot
): ):
responses = await generate_load( responses = await generate_load(
flash_llama_awq, "Test request", max_new_tokens=10, n=4 flash_llama_awq, "What is Deep Learning?", max_new_tokens=10, n=4
) )
assert len(responses) == 4 assert len(responses) == 4
assert all([r.generated_text == responses[0].generated_text for r in responses]) assert all([r.generated_text == "\nWhat is the difference between Deep Learning and Machine" for r in responses])
assert responses == response_snapshot assert responses == response_snapshot

View File

@ -0,0 +1,36 @@
import pytest
@pytest.fixture(scope="module")
def flash_llama_awq_handle_sharded(launcher):
with launcher("abhinavkulkarni/codellama-CodeLlama-7b-Python-hf-w4-g128-awq", num_shard=2, quantize="awq") as handle:
yield handle
@pytest.fixture(scope="module")
async def flash_llama_awq_sharded(flash_llama_awq_handle_sharded):
await flash_llama_awq_handle_sharded.health(300)
return flash_llama_awq_handle_sharded.client
@pytest.mark.asyncio
@pytest.mark.private
async def test_flash_llama_awq_sharded(flash_llama_awq_sharded, response_snapshot):
response = await flash_llama_awq_sharded.generate(
"What is Deep Learning?", max_new_tokens=10, decoder_input_details=True
)
assert response.details.generated_tokens == 10
assert response.generated_text == "\nWhat is the difference between Deep Learning and Machine"
assert response == response_snapshot
@pytest.mark.asyncio
@pytest.mark.private
async def test_flash_llama_awq_load_sharded(
flash_llama_awq_sharded, generate_load, response_snapshot
):
responses = await generate_load(
flash_llama_awq_sharded, "What is Deep Learning?", max_new_tokens=10, n=4
)
assert len(responses) == 4
assert all([r.generated_text == "\nWhat is the difference between Deep Learning and Machine" for r in responses])
assert responses == response_snapshot

View File

@ -299,8 +299,8 @@ class Weights:
"Cannot load `awq` weight, make sure the model is already quantized" "Cannot load `awq` weight, make sure the model is already quantized"
) )
qzeros = self.get_tensor(f"{prefix}.qzeros") qzeros = self.get_sharded(f"{prefix}.qzeros", dim=0)
scales = self.get_tensor(f"{prefix}.scales") scales = self.get_sharded(f"{prefix}.scales", dim=0)
g_idx = None g_idx = None
use_exllama = False use_exllama = False