mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-19 13:52:07 +00:00
Bug Fix: Sliding Window Attention (#3112)
* (fix) sliding window attention * (fix) flashinfer * (typo) collection link * Add window_size_left param ipex rocm * Update window size rocm flash decoding * fix: bump snapshots and improve exceed window test case * feat: add tests for image types and remove alpha from png * Upgrading `from_env` to get token from file when necessary + fix pali_gemma. * fix: add pillow dependency and bump lock+requirements * fix: bump org name in gemma3 test * Fix qwen2. --------- Co-authored-by: drbh <david.richard.holtz@gmail.com> Co-authored-by: Nicolas Patry <patry.nicolas@protonmail.com>
This commit is contained in:
parent
8c2c348f3c
commit
a35fbdb925
@ -14,8 +14,8 @@ Text Generation Inference enables serving optimized models. The following sectio
|
||||
- [Gemma](https://huggingface.co/google/gemma-7b)
|
||||
- [PaliGemma](https://huggingface.co/google/paligemma-3b-pt-224)
|
||||
- [Gemma2](https://huggingface.co/collections/google/gemma-2-release-667d6600fd5220e7b967f315)
|
||||
- [Gemma3](https://huggingface.co/collections/google/gemma-3)
|
||||
- [Gemma3 Text](https://huggingface.co/collections/google/gemma-3)
|
||||
- [Gemma3](https://huggingface.co/collections/google/gemma-3-release-67c6c6f89c4f76621268bb6d)
|
||||
- [Gemma3 Text](https://huggingface.co/collections/google/gemma-3-release-67c6c6f89c4f76621268bb6d)
|
||||
- [Cohere](https://huggingface.co/CohereForAI/c4ai-command-r-plus)
|
||||
- [Dbrx](https://huggingface.co/databricks/dbrx-instruct)
|
||||
- [Mamba](https://huggingface.co/state-spaces/mamba-2.8b-slimpj)
|
||||
|
@ -1,133 +1,109 @@
|
||||
{
|
||||
"details": {
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 20,
|
||||
"finish_reason": "eos_token",
|
||||
"generated_tokens": 16,
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
"id": 506,
|
||||
"logprob": -1.3984375,
|
||||
"special": false,
|
||||
"text": " the"
|
||||
},
|
||||
{
|
||||
"id": 1331,
|
||||
"logprob": -1.6953125,
|
||||
"special": false,
|
||||
"text": " people"
|
||||
},
|
||||
{
|
||||
"id": 236764,
|
||||
"logprob": -0.44726562,
|
||||
"logprob": -0.23535156,
|
||||
"special": false,
|
||||
"text": ","
|
||||
},
|
||||
{
|
||||
"id": 236743,
|
||||
"logprob": -0.011413574,
|
||||
"id": 532,
|
||||
"logprob": -0.24316406,
|
||||
"special": false,
|
||||
"text": " "
|
||||
"text": " and"
|
||||
},
|
||||
{
|
||||
"id": 236812,
|
||||
"logprob": -0.09814453,
|
||||
"id": 506,
|
||||
"logprob": -0.12109375,
|
||||
"special": false,
|
||||
"text": "4"
|
||||
"text": " the"
|
||||
},
|
||||
{
|
||||
"id": 236764,
|
||||
"logprob": -0.044189453,
|
||||
"id": 2780,
|
||||
"logprob": -1.1640625,
|
||||
"special": false,
|
||||
"text": ","
|
||||
"text": " food"
|
||||
},
|
||||
{
|
||||
"id": 236743,
|
||||
"logprob": -0.15625,
|
||||
"id": 236761,
|
||||
"logprob": -0.21386719,
|
||||
"special": false,
|
||||
"text": " "
|
||||
"text": "."
|
||||
},
|
||||
{
|
||||
"id": 236810,
|
||||
"logprob": -0.010864258,
|
||||
"id": 108,
|
||||
"logprob": -0.64453125,
|
||||
"special": false,
|
||||
"text": "5"
|
||||
"text": "\n\n"
|
||||
},
|
||||
{
|
||||
"id": 236764,
|
||||
"logprob": -0.040039062,
|
||||
"id": 2094,
|
||||
"logprob": -0.77734375,
|
||||
"special": false,
|
||||
"text": ","
|
||||
"text": "This"
|
||||
},
|
||||
{
|
||||
"id": 236743,
|
||||
"logprob": -0.26757812,
|
||||
"id": 563,
|
||||
"logprob": -0.040283203,
|
||||
"special": false,
|
||||
"text": " "
|
||||
"text": " is"
|
||||
},
|
||||
{
|
||||
"id": 236825,
|
||||
"logprob": -0.0047302246,
|
||||
"id": 496,
|
||||
"logprob": -0.03125,
|
||||
"special": false,
|
||||
"text": "6"
|
||||
"text": " a"
|
||||
},
|
||||
{
|
||||
"id": 236764,
|
||||
"logprob": -0.026123047,
|
||||
"id": 6290,
|
||||
"logprob": -0.03515625,
|
||||
"special": false,
|
||||
"text": ","
|
||||
"text": " nice"
|
||||
},
|
||||
{
|
||||
"id": 236743,
|
||||
"logprob": -0.265625,
|
||||
"id": 1977,
|
||||
"logprob": -0.0020751953,
|
||||
"special": false,
|
||||
"text": " "
|
||||
"text": " place"
|
||||
},
|
||||
{
|
||||
"id": 236832,
|
||||
"logprob": -0.014160156,
|
||||
"id": 236761,
|
||||
"logprob": -0.0079956055,
|
||||
"special": false,
|
||||
"text": "7"
|
||||
"text": "."
|
||||
},
|
||||
{
|
||||
"id": 236764,
|
||||
"logprob": -0.013977051,
|
||||
"id": 107,
|
||||
"logprob": -0.9921875,
|
||||
"special": false,
|
||||
"text": ","
|
||||
"text": "\n"
|
||||
},
|
||||
{
|
||||
"id": 236743,
|
||||
"logprob": -0.103515625,
|
||||
"special": false,
|
||||
"text": " "
|
||||
},
|
||||
{
|
||||
"id": 236828,
|
||||
"logprob": -0.008178711,
|
||||
"special": false,
|
||||
"text": "8"
|
||||
},
|
||||
{
|
||||
"id": 236764,
|
||||
"logprob": -0.030151367,
|
||||
"special": false,
|
||||
"text": ","
|
||||
},
|
||||
{
|
||||
"id": 236743,
|
||||
"logprob": -0.39453125,
|
||||
"special": false,
|
||||
"text": " "
|
||||
},
|
||||
{
|
||||
"id": 236819,
|
||||
"logprob": -0.008728027,
|
||||
"special": false,
|
||||
"text": "9"
|
||||
},
|
||||
{
|
||||
"id": 236764,
|
||||
"logprob": -0.020629883,
|
||||
"special": false,
|
||||
"text": ","
|
||||
},
|
||||
{
|
||||
"id": 236743,
|
||||
"logprob": -0.08154297,
|
||||
"special": false,
|
||||
"text": " "
|
||||
"id": 106,
|
||||
"logprob": -0.45507812,
|
||||
"special": true,
|
||||
"text": "<end_of_turn>"
|
||||
}
|
||||
],
|
||||
"top_tokens": null
|
||||
},
|
||||
"generated_text": ", 4, 5, 6, 7, 8, 9, "
|
||||
"generated_text": " the people, and the food.\n\nThis is a nice place.\n"
|
||||
}
|
||||
|
@ -8,31 +8,31 @@
|
||||
"tokens": [
|
||||
{
|
||||
"id": 1331,
|
||||
"logprob": -0.32421875,
|
||||
"logprob": -0.34960938,
|
||||
"special": false,
|
||||
"text": " people"
|
||||
},
|
||||
{
|
||||
"id": 8390,
|
||||
"logprob": -0.15332031,
|
||||
"logprob": -0.14746094,
|
||||
"special": false,
|
||||
"text": " died"
|
||||
},
|
||||
{
|
||||
"id": 528,
|
||||
"logprob": -1.140625,
|
||||
"logprob": -1.2265625,
|
||||
"special": false,
|
||||
"text": " in"
|
||||
},
|
||||
{
|
||||
"id": 506,
|
||||
"logprob": -0.42578125,
|
||||
"logprob": -0.47070312,
|
||||
"special": false,
|
||||
"text": " the"
|
||||
},
|
||||
{
|
||||
"id": 3640,
|
||||
"logprob": -0.64453125,
|
||||
"logprob": -0.5859375,
|
||||
"special": false,
|
||||
"text": " United"
|
||||
},
|
||||
@ -44,31 +44,31 @@
|
||||
},
|
||||
{
|
||||
"id": 236761,
|
||||
"logprob": -0.37890625,
|
||||
"logprob": -0.34765625,
|
||||
"special": false,
|
||||
"text": "."
|
||||
},
|
||||
{
|
||||
"id": 108,
|
||||
"logprob": -0.08300781,
|
||||
"logprob": -0.0859375,
|
||||
"special": false,
|
||||
"text": "\n\n"
|
||||
},
|
||||
{
|
||||
"id": 818,
|
||||
"logprob": -1.1796875,
|
||||
"logprob": -1.1640625,
|
||||
"special": false,
|
||||
"text": "The"
|
||||
},
|
||||
{
|
||||
"id": 6816,
|
||||
"logprob": -1.765625,
|
||||
"logprob": -1.890625,
|
||||
"special": false,
|
||||
"text": " generally"
|
||||
},
|
||||
{
|
||||
"id": 10951,
|
||||
"logprob": -0.14550781,
|
||||
"logprob": -0.14648438,
|
||||
"special": false,
|
||||
"text": " accepted"
|
||||
},
|
||||
@ -86,49 +86,49 @@
|
||||
},
|
||||
{
|
||||
"id": 600,
|
||||
"logprob": -0.65625,
|
||||
"logprob": -0.65234375,
|
||||
"special": false,
|
||||
"text": " that"
|
||||
},
|
||||
{
|
||||
"id": 236743,
|
||||
"logprob": -1.1796875,
|
||||
"logprob": -1.2109375,
|
||||
"special": false,
|
||||
"text": " "
|
||||
},
|
||||
{
|
||||
"id": 236825,
|
||||
"logprob": -0.0009918213,
|
||||
"logprob": -0.00088119507,
|
||||
"special": false,
|
||||
"text": "6"
|
||||
},
|
||||
{
|
||||
"id": 236832,
|
||||
"logprob": -6.532669e-05,
|
||||
"logprob": -6.580353e-05,
|
||||
"special": false,
|
||||
"text": "7"
|
||||
},
|
||||
{
|
||||
"id": 236810,
|
||||
"logprob": -4.863739e-05,
|
||||
"logprob": -5.2690506e-05,
|
||||
"special": false,
|
||||
"text": "5"
|
||||
},
|
||||
{
|
||||
"id": 236764,
|
||||
"logprob": -0.00017929077,
|
||||
"logprob": -0.0001745224,
|
||||
"special": false,
|
||||
"text": ","
|
||||
},
|
||||
{
|
||||
"id": 236771,
|
||||
"logprob": -1.2397766e-05,
|
||||
"logprob": -1.180172e-05,
|
||||
"special": false,
|
||||
"text": "0"
|
||||
},
|
||||
{
|
||||
"id": 236771,
|
||||
"logprob": -2.1457672e-06,
|
||||
"logprob": -1.7881393e-06,
|
||||
"special": false,
|
||||
"text": "0"
|
||||
},
|
||||
@ -140,7 +140,7 @@
|
||||
},
|
||||
{
|
||||
"id": 1331,
|
||||
"logprob": -0.50390625,
|
||||
"logprob": -0.44921875,
|
||||
"special": false,
|
||||
"text": " people"
|
||||
},
|
||||
@ -152,67 +152,67 @@
|
||||
},
|
||||
{
|
||||
"id": 528,
|
||||
"logprob": -0.08496094,
|
||||
"logprob": -0.084472656,
|
||||
"special": false,
|
||||
"text": " in"
|
||||
},
|
||||
{
|
||||
"id": 506,
|
||||
"logprob": -0.0003299713,
|
||||
"logprob": -0.00034713745,
|
||||
"special": false,
|
||||
"text": " the"
|
||||
},
|
||||
{
|
||||
"id": 3640,
|
||||
"logprob": -0.028442383,
|
||||
"logprob": -0.028564453,
|
||||
"special": false,
|
||||
"text": " United"
|
||||
},
|
||||
{
|
||||
"id": 4184,
|
||||
"logprob": -0.00011014938,
|
||||
"logprob": -0.00012207031,
|
||||
"special": false,
|
||||
"text": " States"
|
||||
},
|
||||
{
|
||||
"id": 236761,
|
||||
"logprob": -1.1796875,
|
||||
"logprob": -1.15625,
|
||||
"special": false,
|
||||
"text": "."
|
||||
},
|
||||
{
|
||||
"id": 3153,
|
||||
"logprob": -0.104003906,
|
||||
"logprob": -0.103027344,
|
||||
"special": false,
|
||||
"text": " However"
|
||||
},
|
||||
{
|
||||
"id": 236764,
|
||||
"logprob": -0.009094238,
|
||||
"logprob": -0.009155273,
|
||||
"special": false,
|
||||
"text": ","
|
||||
},
|
||||
{
|
||||
"id": 1070,
|
||||
"logprob": -0.88671875,
|
||||
"logprob": -0.92578125,
|
||||
"special": false,
|
||||
"text": " some"
|
||||
},
|
||||
{
|
||||
"id": 61806,
|
||||
"logprob": -0.84765625,
|
||||
"logprob": -0.91796875,
|
||||
"special": false,
|
||||
"text": " historians"
|
||||
},
|
||||
{
|
||||
"id": 4646,
|
||||
"logprob": -1.34375,
|
||||
"logprob": -1.3828125,
|
||||
"special": false,
|
||||
"text": " believe"
|
||||
},
|
||||
{
|
||||
"id": 506,
|
||||
"logprob": -0.59375,
|
||||
"logprob": -0.65234375,
|
||||
"special": false,
|
||||
"text": " the"
|
||||
},
|
||||
@ -230,7 +230,7 @@
|
||||
},
|
||||
{
|
||||
"id": 1451,
|
||||
"logprob": -0.60546875,
|
||||
"logprob": -0.66015625,
|
||||
"special": false,
|
||||
"text": " could"
|
||||
},
|
||||
@ -242,73 +242,73 @@
|
||||
},
|
||||
{
|
||||
"id": 618,
|
||||
"logprob": -0.61328125,
|
||||
"logprob": -0.57421875,
|
||||
"special": false,
|
||||
"text": " as"
|
||||
},
|
||||
{
|
||||
"id": 1494,
|
||||
"logprob": -0.00033569336,
|
||||
"logprob": -0.00036239624,
|
||||
"special": false,
|
||||
"text": " high"
|
||||
},
|
||||
{
|
||||
"id": 618,
|
||||
"logprob": -0.0001411438,
|
||||
"logprob": -0.0001335144,
|
||||
"special": false,
|
||||
"text": " as"
|
||||
},
|
||||
{
|
||||
"id": 236743,
|
||||
"logprob": -0.001045227,
|
||||
"logprob": -0.0009689331,
|
||||
"special": false,
|
||||
"text": " "
|
||||
},
|
||||
{
|
||||
"id": 236770,
|
||||
"logprob": -0.21289062,
|
||||
"logprob": -0.26367188,
|
||||
"special": false,
|
||||
"text": "1"
|
||||
},
|
||||
{
|
||||
"id": 236771,
|
||||
"logprob": -0.13378906,
|
||||
"logprob": -0.17773438,
|
||||
"special": false,
|
||||
"text": "0"
|
||||
},
|
||||
{
|
||||
"id": 3625,
|
||||
"logprob": -0.0087890625,
|
||||
"logprob": -0.012084961,
|
||||
"special": false,
|
||||
"text": " million"
|
||||
},
|
||||
{
|
||||
"id": 236761,
|
||||
"logprob": -0.2109375,
|
||||
"logprob": -0.21289062,
|
||||
"special": false,
|
||||
"text": "."
|
||||
},
|
||||
{
|
||||
"id": 108,
|
||||
"logprob": -0.39453125,
|
||||
"logprob": -0.37304688,
|
||||
"special": false,
|
||||
"text": "\n\n"
|
||||
},
|
||||
{
|
||||
"id": 236777,
|
||||
"logprob": -1.1328125,
|
||||
"logprob": -1.078125,
|
||||
"special": false,
|
||||
"text": "I"
|
||||
},
|
||||
{
|
||||
"id": 1006,
|
||||
"logprob": -1.4140625,
|
||||
"logprob": -1.3203125,
|
||||
"special": false,
|
||||
"text": " am"
|
||||
},
|
||||
{
|
||||
"id": 3182,
|
||||
"logprob": -1.15625,
|
||||
"logprob": -1.078125,
|
||||
"special": false,
|
||||
"text": " looking"
|
||||
},
|
||||
@ -320,13 +320,13 @@
|
||||
},
|
||||
{
|
||||
"id": 919,
|
||||
"logprob": -1.2734375,
|
||||
"logprob": -1.25,
|
||||
"special": false,
|
||||
"text": " more"
|
||||
},
|
||||
{
|
||||
"id": 1938,
|
||||
"logprob": -1.2265625,
|
||||
"logprob": -1.2421875,
|
||||
"special": false,
|
||||
"text": " information"
|
||||
},
|
||||
@ -338,169 +338,169 @@
|
||||
},
|
||||
{
|
||||
"id": 672,
|
||||
"logprob": -0.77734375,
|
||||
"logprob": -0.73046875,
|
||||
"special": false,
|
||||
"text": " this"
|
||||
},
|
||||
{
|
||||
"id": 59725,
|
||||
"logprob": -0.70703125,
|
||||
"logprob": -0.75,
|
||||
"special": false,
|
||||
"text": " discrepancy"
|
||||
},
|
||||
{
|
||||
"id": 532,
|
||||
"logprob": -0.8515625,
|
||||
"logprob": -0.83984375,
|
||||
"special": false,
|
||||
"text": " and"
|
||||
},
|
||||
{
|
||||
"id": 506,
|
||||
"logprob": -0.65625,
|
||||
"logprob": -0.7109375,
|
||||
"special": false,
|
||||
"text": " the"
|
||||
},
|
||||
{
|
||||
"id": 5872,
|
||||
"logprob": -1.15625,
|
||||
"logprob": -1.2734375,
|
||||
"special": false,
|
||||
"text": " factors"
|
||||
},
|
||||
{
|
||||
"id": 600,
|
||||
"logprob": -0.2265625,
|
||||
"logprob": -0.22851562,
|
||||
"special": false,
|
||||
"text": " that"
|
||||
},
|
||||
{
|
||||
"id": 19263,
|
||||
"logprob": -1.125,
|
||||
"logprob": -1.1640625,
|
||||
"special": false,
|
||||
"text": " contributed"
|
||||
},
|
||||
{
|
||||
"id": 531,
|
||||
"logprob": -0.001083374,
|
||||
"logprob": -0.0010757446,
|
||||
"special": false,
|
||||
"text": " to"
|
||||
},
|
||||
{
|
||||
"id": 506,
|
||||
"logprob": -0.2109375,
|
||||
"logprob": -0.18945312,
|
||||
"special": false,
|
||||
"text": " the"
|
||||
},
|
||||
{
|
||||
"id": 5777,
|
||||
"logprob": -1.21875,
|
||||
"logprob": -1.2734375,
|
||||
"special": false,
|
||||
"text": " wide"
|
||||
},
|
||||
{
|
||||
"id": 2644,
|
||||
"logprob": -0.018310547,
|
||||
"logprob": -0.01940918,
|
||||
"special": false,
|
||||
"text": " range"
|
||||
},
|
||||
{
|
||||
"id": 529,
|
||||
"logprob": -0.12988281,
|
||||
"logprob": -0.14550781,
|
||||
"special": false,
|
||||
"text": " of"
|
||||
},
|
||||
{
|
||||
"id": 14287,
|
||||
"logprob": -0.03564453,
|
||||
"logprob": -0.032470703,
|
||||
"special": false,
|
||||
"text": " estimates"
|
||||
},
|
||||
{
|
||||
"id": 236761,
|
||||
"logprob": -0.010314941,
|
||||
"logprob": -0.010375977,
|
||||
"special": false,
|
||||
"text": "."
|
||||
},
|
||||
{
|
||||
"id": 108,
|
||||
"logprob": -0.060546875,
|
||||
"logprob": -0.06591797,
|
||||
"special": false,
|
||||
"text": "\n\n"
|
||||
},
|
||||
{
|
||||
"id": 8291,
|
||||
"logprob": -0.734375,
|
||||
"logprob": -0.8046875,
|
||||
"special": false,
|
||||
"text": "Here"
|
||||
},
|
||||
{
|
||||
"id": 236789,
|
||||
"logprob": -0.26367188,
|
||||
"logprob": -0.23828125,
|
||||
"special": false,
|
||||
"text": "'"
|
||||
},
|
||||
{
|
||||
"id": 236751,
|
||||
"logprob": -1.1920929e-06,
|
||||
"logprob": -1.0728836e-06,
|
||||
"special": false,
|
||||
"text": "s"
|
||||
},
|
||||
{
|
||||
"id": 496,
|
||||
"logprob": -0.15527344,
|
||||
"logprob": -0.17480469,
|
||||
"special": false,
|
||||
"text": " a"
|
||||
},
|
||||
{
|
||||
"id": 25890,
|
||||
"logprob": -0.08886719,
|
||||
"logprob": -0.087402344,
|
||||
"special": false,
|
||||
"text": " breakdown"
|
||||
},
|
||||
{
|
||||
"id": 529,
|
||||
"logprob": -0.0020446777,
|
||||
"logprob": -0.0021209717,
|
||||
"special": false,
|
||||
"text": " of"
|
||||
},
|
||||
{
|
||||
"id": 506,
|
||||
"logprob": -0.17871094,
|
||||
"logprob": -0.19140625,
|
||||
"special": false,
|
||||
"text": " the"
|
||||
},
|
||||
{
|
||||
"id": 5872,
|
||||
"logprob": -0.90234375,
|
||||
"logprob": -1.0078125,
|
||||
"special": false,
|
||||
"text": " factors"
|
||||
},
|
||||
{
|
||||
"id": 20894,
|
||||
"logprob": -0.25976562,
|
||||
"logprob": -0.26367188,
|
||||
"special": false,
|
||||
"text": " contributing"
|
||||
},
|
||||
{
|
||||
"id": 531,
|
||||
"logprob": -8.34465e-05,
|
||||
"logprob": -9.250641e-05,
|
||||
"special": false,
|
||||
"text": " to"
|
||||
},
|
||||
{
|
||||
"id": 506,
|
||||
"logprob": -0.008544922,
|
||||
"logprob": -0.008666992,
|
||||
"special": false,
|
||||
"text": " the"
|
||||
},
|
||||
{
|
||||
"id": 5777,
|
||||
"logprob": -0.62109375,
|
||||
"logprob": -0.6171875,
|
||||
"special": false,
|
||||
"text": " wide"
|
||||
},
|
||||
{
|
||||
"id": 2644,
|
||||
"logprob": -0.0023345947,
|
||||
"logprob": -0.0023956299,
|
||||
"special": false,
|
||||
"text": " range"
|
||||
},
|
||||
@ -512,25 +512,25 @@
|
||||
},
|
||||
{
|
||||
"id": 14287,
|
||||
"logprob": -0.011291504,
|
||||
"logprob": -0.011352539,
|
||||
"special": false,
|
||||
"text": " estimates"
|
||||
},
|
||||
{
|
||||
"id": 573,
|
||||
"logprob": -0.29101562,
|
||||
"logprob": -0.30664062,
|
||||
"special": false,
|
||||
"text": " for"
|
||||
},
|
||||
{
|
||||
"id": 506,
|
||||
"logprob": -0.21484375,
|
||||
"logprob": -0.21386719,
|
||||
"special": false,
|
||||
"text": " the"
|
||||
},
|
||||
{
|
||||
"id": 236743,
|
||||
"logprob": -0.2890625,
|
||||
"logprob": -0.35351562,
|
||||
"special": false,
|
||||
"text": " "
|
||||
},
|
||||
@ -566,19 +566,19 @@
|
||||
},
|
||||
{
|
||||
"id": 10248,
|
||||
"logprob": -0.01953125,
|
||||
"logprob": -0.015258789,
|
||||
"special": false,
|
||||
"text": " pandemic"
|
||||
},
|
||||
{
|
||||
"id": 4355,
|
||||
"logprob": -0.78515625,
|
||||
"logprob": -0.83203125,
|
||||
"special": false,
|
||||
"text": " death"
|
||||
},
|
||||
{
|
||||
"id": 25363,
|
||||
"logprob": -6.771088e-05,
|
||||
"logprob": -7.43866e-05,
|
||||
"special": false,
|
||||
"text": " toll"
|
||||
},
|
||||
@ -590,13 +590,13 @@
|
||||
},
|
||||
{
|
||||
"id": 506,
|
||||
"logprob": -7.033348e-06,
|
||||
"logprob": -6.67572e-06,
|
||||
"special": false,
|
||||
"text": " the"
|
||||
},
|
||||
{
|
||||
"id": 3640,
|
||||
"logprob": -0.0067443848,
|
||||
"logprob": -0.0059509277,
|
||||
"special": false,
|
||||
"text": " United"
|
||||
},
|
||||
|
@ -0,0 +1,26 @@
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"message": {
|
||||
"content": "Okay, let's analyze the image.\n\nThe image is a solid, bright white color. There is nothing else visible within it. \n\nIt's essentially a blank white canvas or a completely white square. \n\nIs there anything specific you'd like me to do with this image, such as describe it further or imagine what it might represent?",
|
||||
"name": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1741965894,
|
||||
"id": "",
|
||||
"model": "google/gemma-3-4b-it",
|
||||
"object": "chat.completion",
|
||||
"system_fingerprint": "3.2.1-dev0-native",
|
||||
"usage": {
|
||||
"completion_tokens": 74,
|
||||
"prompt_tokens": 277,
|
||||
"total_tokens": 351
|
||||
}
|
||||
}
|
@ -0,0 +1,26 @@
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"message": {
|
||||
"content": "Okay, let's analyze the image. \n\nThe image is entirely white, with a very subtle, faint outline of a stylized, cartoonish figure. It appears to be a simplified depiction of a person, likely a child, with a wide-eyed expression and a small, rounded body. \n\nIt's almost like a minimalist, iconic representation. \n\nDo you want me to try and describe it in more detail or perhaps speculate about the context of the image?",
|
||||
"name": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1741965892,
|
||||
"id": "",
|
||||
"model": "google/gemma-3-4b-it",
|
||||
"object": "chat.completion",
|
||||
"system_fingerprint": "3.2.1-dev0-native",
|
||||
"usage": {
|
||||
"completion_tokens": 98,
|
||||
"prompt_tokens": 277,
|
||||
"total_tokens": 375
|
||||
}
|
||||
}
|
@ -0,0 +1,26 @@
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"message": {
|
||||
"content": "Okay, let's analyze the image. \n\nThe transparent image reveals a stylized depiction of **a human head**. It's a minimalist, geometric representation, showing the basic shapes of the skull, eye sockets, and head outline. \n\nDo you want me to describe any specific element of the image in more detail?",
|
||||
"name": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1741966313,
|
||||
"id": "",
|
||||
"model": "google/gemma-3-4b-it",
|
||||
"object": "chat.completion",
|
||||
"system_fingerprint": "3.2.1-dev0-native",
|
||||
"usage": {
|
||||
"completion_tokens": 67,
|
||||
"prompt_tokens": 277,
|
||||
"total_tokens": 344
|
||||
}
|
||||
}
|
@ -5,7 +5,7 @@
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"message": {
|
||||
"content": "Here's a description of what's shown in the image:\n\nThe image depicts a brown cow standing on a sandy beach. The beach has turquoise water and a distant island visible in the background. The sky is bright blue with some white clouds. \n\nIt's a humorous and unexpected sight of a cow enjoying a tropical beach!",
|
||||
"content": "Here's a description of what's shown in the image:\n\nThe image depicts a brown cow standing on a sandy beach. The beach has turquoise water and a distant island visible in the background. The sky is bright blue with some white clouds. \n\nIt's a quite a humorous and unusual scene – a cow enjoying a day at the beach!",
|
||||
"name": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
@ -13,14 +13,14 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1741703756,
|
||||
"created": 1741964480,
|
||||
"id": "",
|
||||
"model": "gg-hf-g/gemma-3-4b-it",
|
||||
"model": "google/gemma-3-4b-it",
|
||||
"object": "chat.completion",
|
||||
"system_fingerprint": "3.1.2-dev0-native",
|
||||
"system_fingerprint": "3.2.1-dev0-native",
|
||||
"usage": {
|
||||
"completion_tokens": 70,
|
||||
"prompt_tokens": 277,
|
||||
"total_tokens": 347
|
||||
"completion_tokens": 74,
|
||||
"prompt_tokens": 275,
|
||||
"total_tokens": 349
|
||||
}
|
||||
}
|
||||
|
@ -5,7 +5,7 @@
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"message": {
|
||||
"content": "Based on the image, the animal is a cow, not a dog! \n\nIt appears to be a **Brazilian cattle breed** known as a **Gir Cow**. They are recognized for their reddish-brown color and distinctive markings.",
|
||||
"content": "That's a fantastic question! However, the image doesn't show a dog. It shows a **Brown Swiss cow** standing on a beach. \n\nBrown Swiss cows are known for their reddish-brown color and distinctive white markings. \n\nIf you'd like, you can send me another image and I’ll do my best to identify it!",
|
||||
"name": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
@ -13,14 +13,14 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1741703753,
|
||||
"created": 1741964477,
|
||||
"id": "",
|
||||
"model": "gg-hf-g/gemma-3-4b-it",
|
||||
"model": "google/gemma-3-4b-it",
|
||||
"object": "chat.completion",
|
||||
"system_fingerprint": "3.1.2-dev0-native",
|
||||
"system_fingerprint": "3.2.1-dev0-native",
|
||||
"usage": {
|
||||
"completion_tokens": 48,
|
||||
"prompt_tokens": 281,
|
||||
"total_tokens": 329
|
||||
"completion_tokens": 75,
|
||||
"prompt_tokens": 279,
|
||||
"total_tokens": 354
|
||||
}
|
||||
}
|
||||
|
@ -1,3 +1,7 @@
|
||||
import base64
|
||||
from io import BytesIO
|
||||
from PIL import Image
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@ -49,9 +53,9 @@ async def test_flash_gemma3_image_cow_dog(flash_gemma3, response_snapshot):
|
||||
|
||||
assert (
|
||||
response.choices[0].message.content
|
||||
== "Based on the image, the animal is a cow, not a dog! \n\nIt appears to be a **Brazilian cattle breed** known as a **Gir Cow**. They are recognized for their reddish-brown color and distinctive markings."
|
||||
== "That's a fantastic question! However, the image doesn't show a dog. It shows a **Brown Swiss cow** standing on a beach. \n\nBrown Swiss cows are known for their reddish-brown color and distinctive white markings. \n\nIf you'd like, you can send me another image and I’ll do my best to identify it!"
|
||||
)
|
||||
assert response.usage["completion_tokens"] == 48
|
||||
assert response.usage["completion_tokens"] == 75
|
||||
assert response == response_snapshot
|
||||
|
||||
|
||||
@ -72,19 +76,95 @@ async def test_flash_gemma3_image_cow(flash_gemma3, response_snapshot):
|
||||
)
|
||||
assert (
|
||||
response.choices[0].message.content
|
||||
== "Here's a description of what's shown in the image:\n\nThe image depicts a brown cow standing on a sandy beach. The beach has turquoise water and a distant island visible in the background. The sky is bright blue with some white clouds. \n\nIt's a humorous and unexpected sight of a cow enjoying a tropical beach!"
|
||||
== "Here's a description of what's shown in the image:\n\nThe image depicts a brown cow standing on a sandy beach. The beach has turquoise water and a distant island visible in the background. The sky is bright blue with some white clouds. \n\nIt's a quite a humorous and unusual scene – a cow enjoying a day at the beach!"
|
||||
)
|
||||
assert response.usage["completion_tokens"] == 70
|
||||
assert response.usage["completion_tokens"] == 74
|
||||
assert response == response_snapshot
|
||||
|
||||
|
||||
async def test_exceed_window(flash_gemma3, response_snapshot):
|
||||
response = await flash_gemma3.generate(
|
||||
"This is a nice place. " * 800 + "Now count: 1, 2, 3",
|
||||
"This is a nice place. " * 800 + "I really enjoy the scenery,",
|
||||
seed=42,
|
||||
max_new_tokens=20,
|
||||
)
|
||||
|
||||
assert response.generated_text == ", 4, 5, 6, 7, 8, 9, "
|
||||
assert response.details.generated_tokens == 20
|
||||
assert (
|
||||
response.generated_text
|
||||
== " the people, and the food.\n\nThis is a nice place.\n"
|
||||
)
|
||||
assert response.details.generated_tokens == 16
|
||||
assert response == response_snapshot
|
||||
|
||||
|
||||
# Helper function to convert a Pillow image to a base64 data URL
|
||||
def image_to_data_url(img: Image.Image, fmt: str) -> str:
|
||||
buffer = BytesIO()
|
||||
img.save(buffer, format=fmt)
|
||||
img_data = buffer.getvalue()
|
||||
b64_str = base64.b64encode(img_data).decode("utf-8")
|
||||
mime_type = "image/png" if fmt.upper() == "PNG" else "image/jpeg"
|
||||
return f"data:{mime_type};base64,{b64_str}"
|
||||
|
||||
|
||||
async def test_flash_gemma3_image_base64_rgba(flash_gemma3, response_snapshot):
|
||||
# Create an empty 100x100 PNG image with alpha (transparent background)
|
||||
img = Image.new("RGBA", (100, 100), (0, 0, 0, 0))
|
||||
data_url = image_to_data_url(img, "PNG")
|
||||
response = await flash_gemma3.chat(
|
||||
seed=42,
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "image_url", "image_url": {"url": data_url}},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "What do you see in this transparent image?",
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
max_tokens=100,
|
||||
)
|
||||
assert response == response_snapshot
|
||||
|
||||
|
||||
async def test_flash_gemma3_image_base64_rgb_png(flash_gemma3, response_snapshot):
|
||||
# Create an empty 100x100 PNG image without alpha (white background)
|
||||
img = Image.new("RGB", (100, 100), (255, 255, 255))
|
||||
data_url = image_to_data_url(img, "PNG")
|
||||
response = await flash_gemma3.chat(
|
||||
seed=42,
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "image_url", "image_url": {"url": data_url}},
|
||||
{"type": "text", "text": "What do you see in this plain image?"},
|
||||
],
|
||||
},
|
||||
],
|
||||
max_tokens=100,
|
||||
)
|
||||
assert response == response_snapshot
|
||||
|
||||
|
||||
async def test_flash_gemma3_image_base64_rgb_jpg(flash_gemma3, response_snapshot):
|
||||
# Create an empty 100x100 JPEG image (white background)
|
||||
img = Image.new("RGB", (100, 100), (255, 255, 255))
|
||||
data_url = image_to_data_url(img, "JPEG")
|
||||
response = await flash_gemma3.chat(
|
||||
seed=42,
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "image_url", "image_url": {"url": data_url}},
|
||||
{"type": "text", "text": "What do you see in this JPEG image?"},
|
||||
],
|
||||
},
|
||||
],
|
||||
max_tokens=100,
|
||||
)
|
||||
assert response == response_snapshot
|
||||
|
@ -15,6 +15,7 @@ dependencies = [
|
||||
"numpy>=2.0",
|
||||
"openai>=1.65",
|
||||
"huggingface_hub>=0.29",
|
||||
"pillow>=11.1.0",
|
||||
]
|
||||
|
||||
[tool.isort]
|
||||
|
@ -1,8 +1,8 @@
|
||||
# This file was autogenerated by uv via the following command:
|
||||
# uv pip compile pyproject.toml -o requirements.txt
|
||||
aiohappyeyeballs==2.4.6
|
||||
# uv pip compile pyproject.toml
|
||||
aiohappyeyeballs==2.6.1
|
||||
# via aiohttp
|
||||
aiohttp==3.11.12
|
||||
aiohttp==3.11.13
|
||||
# via text-generation
|
||||
aiosignal==1.3.2
|
||||
# via aiohttp
|
||||
@ -12,7 +12,7 @@ anyio==4.8.0
|
||||
# via
|
||||
# httpx
|
||||
# openai
|
||||
attrs==25.1.0
|
||||
attrs==25.3.0
|
||||
# via aiohttp
|
||||
certifi==2025.1.31
|
||||
# via
|
||||
@ -25,13 +25,13 @@ distro==1.9.0
|
||||
# via openai
|
||||
docker==7.1.0
|
||||
# via text-generation-integration-tests (pyproject.toml)
|
||||
filelock==3.17.0
|
||||
filelock==3.18.0
|
||||
# via huggingface-hub
|
||||
frozenlist==1.5.0
|
||||
# via
|
||||
# aiohttp
|
||||
# aiosignal
|
||||
fsspec==2025.2.0
|
||||
fsspec==2025.3.0
|
||||
# via huggingface-hub
|
||||
h11==0.14.0
|
||||
# via httpcore
|
||||
@ -39,7 +39,7 @@ httpcore==1.0.7
|
||||
# via httpx
|
||||
httpx==0.28.1
|
||||
# via openai
|
||||
huggingface-hub==0.29.0
|
||||
huggingface-hub==0.29.3
|
||||
# via
|
||||
# text-generation-integration-tests (pyproject.toml)
|
||||
# text-generation
|
||||
@ -51,7 +51,7 @@ idna==3.10
|
||||
# yarl
|
||||
iniconfig==2.0.0
|
||||
# via pytest
|
||||
jiter==0.8.2
|
||||
jiter==0.9.0
|
||||
# via openai
|
||||
multidict==6.1.0
|
||||
# via
|
||||
@ -59,15 +59,17 @@ multidict==6.1.0
|
||||
# yarl
|
||||
numpy==2.2.3
|
||||
# via text-generation-integration-tests (pyproject.toml)
|
||||
openai==1.65.3
|
||||
openai==1.66.3
|
||||
# via text-generation-integration-tests (pyproject.toml)
|
||||
packaging==24.2
|
||||
# via
|
||||
# huggingface-hub
|
||||
# pytest
|
||||
pillow==11.1.0
|
||||
# via text-generation-integration-tests (pyproject.toml)
|
||||
pluggy==1.5.0
|
||||
# via pytest
|
||||
propcache==0.2.1
|
||||
propcache==0.3.0
|
||||
# via
|
||||
# aiohttp
|
||||
# yarl
|
||||
@ -78,7 +80,7 @@ pydantic==2.10.6
|
||||
# text-generation
|
||||
pydantic-core==2.27.2
|
||||
# via pydantic
|
||||
pytest==8.3.4
|
||||
pytest==8.3.5
|
||||
# via
|
||||
# text-generation-integration-tests (pyproject.toml)
|
||||
# pytest-asyncio
|
||||
@ -95,7 +97,7 @@ sniffio==1.3.1
|
||||
# via
|
||||
# anyio
|
||||
# openai
|
||||
syrupy==4.8.1
|
||||
syrupy==4.9.0
|
||||
# via text-generation-integration-tests (pyproject.toml)
|
||||
text-generation==0.7.0
|
||||
# via text-generation-integration-tests (pyproject.toml)
|
||||
|
@ -97,6 +97,21 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anyio"
|
||||
version = "4.8.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "exceptiongroup", marker = "python_full_version < '3.11'" },
|
||||
{ name = "idna" },
|
||||
{ name = "sniffio" },
|
||||
{ name = "typing-extensions" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/a3/73/199a98fc2dae33535d6b8e8e6ec01f8c1d76c9adb096c6b7d64823038cde/anyio-4.8.0.tar.gz", hash = "sha256:1d9fe889df5212298c0c0723fa20479d1b94883a2df44bd3897aa91083316f7a", size = 181126 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/46/eb/e7f063ad1fec6b3178a3cd82d1a3c4de82cccf283fc42746168188e1cdd5/anyio-4.8.0-py3-none-any.whl", hash = "sha256:b5011f270ab5eb0abf13385f851315585cc37ef330dd88e27ec3d34d651fd47a", size = 96041 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "async-timeout"
|
||||
version = "5.0.1"
|
||||
@ -181,6 +196,15 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "distro"
|
||||
version = "1.9.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/fc/f8/98eea607f65de6527f8a2e8885fc8015d3e6f5775df186e443e0964a11c3/distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed", size = 60722 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/12/b3/231ffd4ab1fc9d679809f356cebee130ac7daa00d6d6f3206dd4fd137e9e/distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2", size = 20277 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "docker"
|
||||
version = "7.1.0"
|
||||
@ -276,6 +300,43 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/e2/94/758680531a00d06e471ef649e4ec2ed6bf185356a7f9fbfbb7368a40bd49/fsspec-2025.2.0-py3-none-any.whl", hash = "sha256:9de2ad9ce1f85e1931858535bc882543171d197001a0a5eb2ddc04f1781ab95b", size = 184484 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "h11"
|
||||
version = "0.14.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/f5/38/3af3d3633a34a3316095b39c8e8fb4853a28a536e55d347bd8d8e9a14b03/h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d", size = 100418 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/95/04/ff642e65ad6b90db43e668d70ffb6736436c7ce41fcc549f4e9472234127/h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761", size = 58259 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "httpcore"
|
||||
version = "1.0.7"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "certifi" },
|
||||
{ name = "h11" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/6a/41/d7d0a89eb493922c37d343b607bc1b5da7f5be7e383740b4753ad8943e90/httpcore-1.0.7.tar.gz", hash = "sha256:8551cb62a169ec7162ac7be8d4817d561f60e08eaa485234898414bb5a8a0b4c", size = 85196 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/87/f5/72347bc88306acb359581ac4d52f23c0ef445b57157adedb9aee0cd689d2/httpcore-1.0.7-py3-none-any.whl", hash = "sha256:a3fff8f43dc260d5bd363d9f9cf1830fa3a458b332856f34282de498ed420edd", size = 78551 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "httpx"
|
||||
version = "0.28.1"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "anyio" },
|
||||
{ name = "certifi" },
|
||||
{ name = "httpcore" },
|
||||
{ name = "idna" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/b1/df/48c586a5fe32a0f01324ee087459e112ebb7224f646c0b5023f5e79e9956/httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc", size = 141406 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "huggingface-hub"
|
||||
version = "0.29.0"
|
||||
@ -312,6 +373,50 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/ef/a6/62565a6e1cf69e10f5727360368e451d4b7f58beeac6173dc9db836a5b46/iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374", size = 5892 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "jiter"
|
||||
version = "0.9.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/1e/c2/e4562507f52f0af7036da125bb699602ead37a2332af0788f8e0a3417f36/jiter-0.9.0.tar.gz", hash = "sha256:aadba0964deb424daa24492abc3d229c60c4a31bfee205aedbf1acc7639d7893", size = 162604 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/b0/82/39f7c9e67b3b0121f02a0b90d433626caa95a565c3d2449fea6bcfa3f5f5/jiter-0.9.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:816ec9b60fdfd1fec87da1d7ed46c66c44ffec37ab2ef7de5b147b2fce3fd5ad", size = 314540 },
|
||||
{ url = "https://files.pythonhosted.org/packages/01/07/7bf6022c5a152fca767cf5c086bb41f7c28f70cf33ad259d023b53c0b858/jiter-0.9.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9b1d3086f8a3ee0194ecf2008cf81286a5c3e540d977fa038ff23576c023c0ea", size = 321065 },
|
||||
{ url = "https://files.pythonhosted.org/packages/6c/b2/de3f3446ecba7c48f317568e111cc112613da36c7b29a6de45a1df365556/jiter-0.9.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1339f839b91ae30b37c409bf16ccd3dc453e8b8c3ed4bd1d6a567193651a4a51", size = 341664 },
|
||||
{ url = "https://files.pythonhosted.org/packages/13/cf/6485a4012af5d407689c91296105fcdb080a3538e0658d2abf679619c72f/jiter-0.9.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ffba79584b3b670fefae66ceb3a28822365d25b7bf811e030609a3d5b876f538", size = 364635 },
|
||||
{ url = "https://files.pythonhosted.org/packages/0d/f7/4a491c568f005553240b486f8e05c82547340572d5018ef79414b4449327/jiter-0.9.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5cfc7d0a8e899089d11f065e289cb5b2daf3d82fbe028f49b20d7b809193958d", size = 406288 },
|
||||
{ url = "https://files.pythonhosted.org/packages/d3/ca/f4263ecbce7f5e6bded8f52a9f1a66540b270c300b5c9f5353d163f9ac61/jiter-0.9.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e00a1a2bbfaaf237e13c3d1592356eab3e9015d7efd59359ac8b51eb56390a12", size = 397499 },
|
||||
{ url = "https://files.pythonhosted.org/packages/ac/a2/522039e522a10bac2f2194f50e183a49a360d5f63ebf46f6d890ef8aa3f9/jiter-0.9.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d1d9870561eb26b11448854dce0ff27a9a27cb616b632468cafc938de25e9e51", size = 352926 },
|
||||
{ url = "https://files.pythonhosted.org/packages/b1/67/306a5c5abc82f2e32bd47333a1c9799499c1c3a415f8dde19dbf876f00cb/jiter-0.9.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9872aeff3f21e437651df378cb75aeb7043e5297261222b6441a620218b58708", size = 384506 },
|
||||
{ url = "https://files.pythonhosted.org/packages/0f/89/c12fe7b65a4fb74f6c0d7b5119576f1f16c79fc2953641f31b288fad8a04/jiter-0.9.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:1fd19112d1049bdd47f17bfbb44a2c0001061312dcf0e72765bfa8abd4aa30e5", size = 520621 },
|
||||
{ url = "https://files.pythonhosted.org/packages/c4/2b/d57900c5c06e6273fbaa76a19efa74dbc6e70c7427ab421bf0095dfe5d4a/jiter-0.9.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:6ef5da104664e526836070e4a23b5f68dec1cc673b60bf1edb1bfbe8a55d0678", size = 512613 },
|
||||
{ url = "https://files.pythonhosted.org/packages/89/05/d8b90bfb21e58097d5a4e0224f2940568366f68488a079ae77d4b2653500/jiter-0.9.0-cp310-cp310-win32.whl", hash = "sha256:cb12e6d65ebbefe5518de819f3eda53b73187b7089040b2d17f5b39001ff31c4", size = 206613 },
|
||||
{ url = "https://files.pythonhosted.org/packages/2c/1d/5767f23f88e4f885090d74bbd2755518050a63040c0f59aa059947035711/jiter-0.9.0-cp310-cp310-win_amd64.whl", hash = "sha256:c43ca669493626d8672be3b645dbb406ef25af3f4b6384cfd306da7eb2e70322", size = 208371 },
|
||||
{ url = "https://files.pythonhosted.org/packages/23/44/e241a043f114299254e44d7e777ead311da400517f179665e59611ab0ee4/jiter-0.9.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:6c4d99c71508912a7e556d631768dcdef43648a93660670986916b297f1c54af", size = 314654 },
|
||||
{ url = "https://files.pythonhosted.org/packages/fb/1b/a7e5e42db9fa262baaa9489d8d14ca93f8663e7f164ed5e9acc9f467fc00/jiter-0.9.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8f60fb8ce7df529812bf6c625635a19d27f30806885139e367af93f6e734ef58", size = 320909 },
|
||||
{ url = "https://files.pythonhosted.org/packages/60/bf/8ebdfce77bc04b81abf2ea316e9c03b4a866a7d739cf355eae4d6fd9f6fe/jiter-0.9.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:51c4e1a4f8ea84d98b7b98912aa4290ac3d1eabfde8e3c34541fae30e9d1f08b", size = 341733 },
|
||||
{ url = "https://files.pythonhosted.org/packages/a8/4e/754ebce77cff9ab34d1d0fa0fe98f5d42590fd33622509a3ba6ec37ff466/jiter-0.9.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5f4c677c424dc76684fea3e7285a7a2a7493424bea89ac441045e6a1fb1d7b3b", size = 365097 },
|
||||
{ url = "https://files.pythonhosted.org/packages/32/2c/6019587e6f5844c612ae18ca892f4cd7b3d8bbf49461ed29e384a0f13d98/jiter-0.9.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2221176dfec87f3470b21e6abca056e6b04ce9bff72315cb0b243ca9e835a4b5", size = 406603 },
|
||||
{ url = "https://files.pythonhosted.org/packages/da/e9/c9e6546c817ab75a1a7dab6dcc698e62e375e1017113e8e983fccbd56115/jiter-0.9.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3c7adb66f899ffa25e3c92bfcb593391ee1947dbdd6a9a970e0d7e713237d572", size = 396625 },
|
||||
{ url = "https://files.pythonhosted.org/packages/be/bd/976b458add04271ebb5a255e992bd008546ea04bb4dcadc042a16279b4b4/jiter-0.9.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c98d27330fdfb77913c1097a7aab07f38ff2259048949f499c9901700789ac15", size = 351832 },
|
||||
{ url = "https://files.pythonhosted.org/packages/07/51/fe59e307aaebec9265dbad44d9d4381d030947e47b0f23531579b9a7c2df/jiter-0.9.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:eda3f8cc74df66892b1d06b5d41a71670c22d95a1ca2cbab73654745ce9d0419", size = 384590 },
|
||||
{ url = "https://files.pythonhosted.org/packages/db/55/5dcd2693794d8e6f4889389ff66ef3be557a77f8aeeca8973a97a7c00557/jiter-0.9.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:dd5ab5ddc11418dce28343123644a100f487eaccf1de27a459ab36d6cca31043", size = 520690 },
|
||||
{ url = "https://files.pythonhosted.org/packages/54/d5/9f51dc90985e9eb251fbbb747ab2b13b26601f16c595a7b8baba964043bd/jiter-0.9.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:42f8a68a69f047b310319ef8e2f52fdb2e7976fb3313ef27df495cf77bcad965", size = 512649 },
|
||||
{ url = "https://files.pythonhosted.org/packages/a6/e5/4e385945179bcf128fa10ad8dca9053d717cbe09e258110e39045c881fe5/jiter-0.9.0-cp311-cp311-win32.whl", hash = "sha256:a25519efb78a42254d59326ee417d6f5161b06f5da827d94cf521fed961b1ff2", size = 206920 },
|
||||
{ url = "https://files.pythonhosted.org/packages/4c/47/5e0b94c603d8e54dd1faab439b40b832c277d3b90743e7835879ab663757/jiter-0.9.0-cp311-cp311-win_amd64.whl", hash = "sha256:923b54afdd697dfd00d368b7ccad008cccfeb1efb4e621f32860c75e9f25edbd", size = 210119 },
|
||||
{ url = "https://files.pythonhosted.org/packages/af/d7/c55086103d6f29b694ec79156242304adf521577530d9031317ce5338c59/jiter-0.9.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:7b46249cfd6c48da28f89eb0be3f52d6fdb40ab88e2c66804f546674e539ec11", size = 309203 },
|
||||
{ url = "https://files.pythonhosted.org/packages/b0/01/f775dfee50beb420adfd6baf58d1c4d437de41c9b666ddf127c065e5a488/jiter-0.9.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:609cf3c78852f1189894383cf0b0b977665f54cb38788e3e6b941fa6d982c00e", size = 319678 },
|
||||
{ url = "https://files.pythonhosted.org/packages/ab/b8/09b73a793714726893e5d46d5c534a63709261af3d24444ad07885ce87cb/jiter-0.9.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d726a3890a54561e55a9c5faea1f7655eda7f105bd165067575ace6e65f80bb2", size = 341816 },
|
||||
{ url = "https://files.pythonhosted.org/packages/35/6f/b8f89ec5398b2b0d344257138182cc090302854ed63ed9c9051e9c673441/jiter-0.9.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2e89dc075c1fef8fa9be219e249f14040270dbc507df4215c324a1839522ea75", size = 364152 },
|
||||
{ url = "https://files.pythonhosted.org/packages/9b/ca/978cc3183113b8e4484cc7e210a9ad3c6614396e7abd5407ea8aa1458eef/jiter-0.9.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:04e8ffa3c353b1bc4134f96f167a2082494351e42888dfcf06e944f2729cbe1d", size = 406991 },
|
||||
{ url = "https://files.pythonhosted.org/packages/13/3a/72861883e11a36d6aa314b4922125f6ae90bdccc225cd96d24cc78a66385/jiter-0.9.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:203f28a72a05ae0e129b3ed1f75f56bc419d5f91dfacd057519a8bd137b00c42", size = 395824 },
|
||||
{ url = "https://files.pythonhosted.org/packages/87/67/22728a86ef53589c3720225778f7c5fdb617080e3deaed58b04789418212/jiter-0.9.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fca1a02ad60ec30bb230f65bc01f611c8608b02d269f998bc29cca8619a919dc", size = 351318 },
|
||||
{ url = "https://files.pythonhosted.org/packages/69/b9/f39728e2e2007276806d7a6609cda7fac44ffa28ca0d02c49a4f397cc0d9/jiter-0.9.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:237e5cee4d5d2659aaf91bbf8ec45052cc217d9446070699441a91b386ae27dc", size = 384591 },
|
||||
{ url = "https://files.pythonhosted.org/packages/eb/8f/8a708bc7fd87b8a5d861f1c118a995eccbe6d672fe10c9753e67362d0dd0/jiter-0.9.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:528b6b71745e7326eed73c53d4aa57e2a522242320b6f7d65b9c5af83cf49b6e", size = 520746 },
|
||||
{ url = "https://files.pythonhosted.org/packages/95/1e/65680c7488bd2365dbd2980adaf63c562d3d41d3faac192ebc7ef5b4ae25/jiter-0.9.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:9f48e86b57bc711eb5acdfd12b6cb580a59cc9a993f6e7dcb6d8b50522dcd50d", size = 512754 },
|
||||
{ url = "https://files.pythonhosted.org/packages/78/f3/fdc43547a9ee6e93c837685da704fb6da7dba311fc022e2766d5277dfde5/jiter-0.9.0-cp312-cp312-win32.whl", hash = "sha256:699edfde481e191d81f9cf6d2211debbfe4bd92f06410e7637dffb8dd5dfde06", size = 207075 },
|
||||
{ url = "https://files.pythonhosted.org/packages/cd/9d/742b289016d155f49028fe1bfbeb935c9bf0ffeefdf77daf4a63a42bb72b/jiter-0.9.0-cp312-cp312-win_amd64.whl", hash = "sha256:099500d07b43f61d8bd780466d429c45a7b25411b334c60ca875fa775f68ccb0", size = 207999 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "multidict"
|
||||
version = "6.1.0"
|
||||
@ -411,6 +516,25 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/17/7f/d322a4125405920401450118dbdc52e0384026bd669939484670ce8b2ab9/numpy-2.2.3-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:783145835458e60fa97afac25d511d00a1eca94d4a8f3ace9fe2043003c678e4", size = 12839607 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "openai"
|
||||
version = "1.66.3"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "anyio" },
|
||||
{ name = "distro" },
|
||||
{ name = "httpx" },
|
||||
{ name = "jiter" },
|
||||
{ name = "pydantic" },
|
||||
{ name = "sniffio" },
|
||||
{ name = "tqdm" },
|
||||
{ name = "typing-extensions" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/a3/77/5172104ca1df35ed2ed8fb26dbc787f721c39498fc51d666c4db07756a0c/openai-1.66.3.tar.gz", hash = "sha256:8dde3aebe2d081258d4159c4cb27bdc13b5bb3f7ea2201d9bd940b9a89faf0c9", size = 397244 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/78/5a/e20182f7b6171642d759c548daa0ba20a1d3ac10d2bd0a13fd75704a9ac3/openai-1.66.3-py3-none-any.whl", hash = "sha256:a427c920f727711877ab17c11b95f1230b27767ba7a01e5b66102945141ceca9", size = 567400 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "packaging"
|
||||
version = "24.2"
|
||||
@ -420,6 +544,54 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/88/ef/eb23f262cca3c0c4eb7ab1933c3b1f03d021f2c48f54763065b6f0e321be/packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759", size = 65451 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pillow"
|
||||
version = "11.1.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/f3/af/c097e544e7bd278333db77933e535098c259609c4eb3b85381109602fb5b/pillow-11.1.0.tar.gz", hash = "sha256:368da70808b36d73b4b390a8ffac11069f8a5c85f29eff1f1b01bcf3ef5b2a20", size = 46742715 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/50/1c/2dcea34ac3d7bc96a1fd1bd0a6e06a57c67167fec2cff8d95d88229a8817/pillow-11.1.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:e1abe69aca89514737465752b4bcaf8016de61b3be1397a8fc260ba33321b3a8", size = 3229983 },
|
||||
{ url = "https://files.pythonhosted.org/packages/14/ca/6bec3df25e4c88432681de94a3531cc738bd85dea6c7aa6ab6f81ad8bd11/pillow-11.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c640e5a06869c75994624551f45e5506e4256562ead981cce820d5ab39ae2192", size = 3101831 },
|
||||
{ url = "https://files.pythonhosted.org/packages/d4/2c/668e18e5521e46eb9667b09e501d8e07049eb5bfe39d56be0724a43117e6/pillow-11.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a07dba04c5e22824816b2615ad7a7484432d7f540e6fa86af60d2de57b0fcee2", size = 4314074 },
|
||||
{ url = "https://files.pythonhosted.org/packages/02/80/79f99b714f0fc25f6a8499ecfd1f810df12aec170ea1e32a4f75746051ce/pillow-11.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e267b0ed063341f3e60acd25c05200df4193e15a4a5807075cd71225a2386e26", size = 4394933 },
|
||||
{ url = "https://files.pythonhosted.org/packages/81/aa/8d4ad25dc11fd10a2001d5b8a80fdc0e564ac33b293bdfe04ed387e0fd95/pillow-11.1.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:bd165131fd51697e22421d0e467997ad31621b74bfc0b75956608cb2906dda07", size = 4353349 },
|
||||
{ url = "https://files.pythonhosted.org/packages/84/7a/cd0c3eaf4a28cb2a74bdd19129f7726277a7f30c4f8424cd27a62987d864/pillow-11.1.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:abc56501c3fd148d60659aae0af6ddc149660469082859fa7b066a298bde9482", size = 4476532 },
|
||||
{ url = "https://files.pythonhosted.org/packages/8f/8b/a907fdd3ae8f01c7670dfb1499c53c28e217c338b47a813af8d815e7ce97/pillow-11.1.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:54ce1c9a16a9561b6d6d8cb30089ab1e5eb66918cb47d457bd996ef34182922e", size = 4279789 },
|
||||
{ url = "https://files.pythonhosted.org/packages/6f/9a/9f139d9e8cccd661c3efbf6898967a9a337eb2e9be2b454ba0a09533100d/pillow-11.1.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:73ddde795ee9b06257dac5ad42fcb07f3b9b813f8c1f7f870f402f4dc54b5269", size = 4413131 },
|
||||
{ url = "https://files.pythonhosted.org/packages/a8/68/0d8d461f42a3f37432203c8e6df94da10ac8081b6d35af1c203bf3111088/pillow-11.1.0-cp310-cp310-win32.whl", hash = "sha256:3a5fe20a7b66e8135d7fd617b13272626a28278d0e578c98720d9ba4b2439d49", size = 2291213 },
|
||||
{ url = "https://files.pythonhosted.org/packages/14/81/d0dff759a74ba87715509af9f6cb21fa21d93b02b3316ed43bda83664db9/pillow-11.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:b6123aa4a59d75f06e9dd3dac5bf8bc9aa383121bb3dd9a7a612e05eabc9961a", size = 2625725 },
|
||||
{ url = "https://files.pythonhosted.org/packages/ce/1f/8d50c096a1d58ef0584ddc37e6f602828515219e9d2428e14ce50f5ecad1/pillow-11.1.0-cp310-cp310-win_arm64.whl", hash = "sha256:a76da0a31da6fcae4210aa94fd779c65c75786bc9af06289cd1c184451ef7a65", size = 2375213 },
|
||||
{ url = "https://files.pythonhosted.org/packages/dd/d6/2000bfd8d5414fb70cbbe52c8332f2283ff30ed66a9cde42716c8ecbe22c/pillow-11.1.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:e06695e0326d05b06833b40b7ef477e475d0b1ba3a6d27da1bb48c23209bf457", size = 3229968 },
|
||||
{ url = "https://files.pythonhosted.org/packages/d9/45/3fe487010dd9ce0a06adf9b8ff4f273cc0a44536e234b0fad3532a42c15b/pillow-11.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:96f82000e12f23e4f29346e42702b6ed9a2f2fea34a740dd5ffffcc8c539eb35", size = 3101806 },
|
||||
{ url = "https://files.pythonhosted.org/packages/e3/72/776b3629c47d9d5f1c160113158a7a7ad177688d3a1159cd3b62ded5a33a/pillow-11.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a3cd561ded2cf2bbae44d4605837221b987c216cff94f49dfeed63488bb228d2", size = 4322283 },
|
||||
{ url = "https://files.pythonhosted.org/packages/e4/c2/e25199e7e4e71d64eeb869f5b72c7ddec70e0a87926398785ab944d92375/pillow-11.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f189805c8be5ca5add39e6f899e6ce2ed824e65fb45f3c28cb2841911da19070", size = 4402945 },
|
||||
{ url = "https://files.pythonhosted.org/packages/c1/ed/51d6136c9d5911f78632b1b86c45241c712c5a80ed7fa7f9120a5dff1eba/pillow-11.1.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:dd0052e9db3474df30433f83a71b9b23bd9e4ef1de13d92df21a52c0303b8ab6", size = 4361228 },
|
||||
{ url = "https://files.pythonhosted.org/packages/48/a4/fbfe9d5581d7b111b28f1d8c2762dee92e9821bb209af9fa83c940e507a0/pillow-11.1.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:837060a8599b8f5d402e97197d4924f05a2e0d68756998345c829c33186217b1", size = 4484021 },
|
||||
{ url = "https://files.pythonhosted.org/packages/39/db/0b3c1a5018117f3c1d4df671fb8e47d08937f27519e8614bbe86153b65a5/pillow-11.1.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:aa8dd43daa836b9a8128dbe7d923423e5ad86f50a7a14dc688194b7be5c0dea2", size = 4287449 },
|
||||
{ url = "https://files.pythonhosted.org/packages/d9/58/bc128da7fea8c89fc85e09f773c4901e95b5936000e6f303222490c052f3/pillow-11.1.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:0a2f91f8a8b367e7a57c6e91cd25af510168091fb89ec5146003e424e1558a96", size = 4419972 },
|
||||
{ url = "https://files.pythonhosted.org/packages/5f/bb/58f34379bde9fe197f51841c5bbe8830c28bbb6d3801f16a83b8f2ad37df/pillow-11.1.0-cp311-cp311-win32.whl", hash = "sha256:c12fc111ef090845de2bb15009372175d76ac99969bdf31e2ce9b42e4b8cd88f", size = 2291201 },
|
||||
{ url = "https://files.pythonhosted.org/packages/3a/c6/fce9255272bcf0c39e15abd2f8fd8429a954cf344469eaceb9d0d1366913/pillow-11.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:fbd43429d0d7ed6533b25fc993861b8fd512c42d04514a0dd6337fb3ccf22761", size = 2625686 },
|
||||
{ url = "https://files.pythonhosted.org/packages/c8/52/8ba066d569d932365509054859f74f2a9abee273edcef5cd75e4bc3e831e/pillow-11.1.0-cp311-cp311-win_arm64.whl", hash = "sha256:f7955ecf5609dee9442cbface754f2c6e541d9e6eda87fad7f7a989b0bdb9d71", size = 2375194 },
|
||||
{ url = "https://files.pythonhosted.org/packages/95/20/9ce6ed62c91c073fcaa23d216e68289e19d95fb8188b9fb7a63d36771db8/pillow-11.1.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:2062ffb1d36544d42fcaa277b069c88b01bb7298f4efa06731a7fd6cc290b81a", size = 3226818 },
|
||||
{ url = "https://files.pythonhosted.org/packages/b9/d8/f6004d98579a2596c098d1e30d10b248798cceff82d2b77aa914875bfea1/pillow-11.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a85b653980faad27e88b141348707ceeef8a1186f75ecc600c395dcac19f385b", size = 3101662 },
|
||||
{ url = "https://files.pythonhosted.org/packages/08/d9/892e705f90051c7a2574d9f24579c9e100c828700d78a63239676f960b74/pillow-11.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9409c080586d1f683df3f184f20e36fb647f2e0bc3988094d4fd8c9f4eb1b3b3", size = 4329317 },
|
||||
{ url = "https://files.pythonhosted.org/packages/8c/aa/7f29711f26680eab0bcd3ecdd6d23ed6bce180d82e3f6380fb7ae35fcf3b/pillow-11.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7fdadc077553621911f27ce206ffcbec7d3f8d7b50e0da39f10997e8e2bb7f6a", size = 4412999 },
|
||||
{ url = "https://files.pythonhosted.org/packages/c8/c4/8f0fe3b9e0f7196f6d0bbb151f9fba323d72a41da068610c4c960b16632a/pillow-11.1.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:93a18841d09bcdd774dcdc308e4537e1f867b3dec059c131fde0327899734aa1", size = 4368819 },
|
||||
{ url = "https://files.pythonhosted.org/packages/38/0d/84200ed6a871ce386ddc82904bfadc0c6b28b0c0ec78176871a4679e40b3/pillow-11.1.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:9aa9aeddeed452b2f616ff5507459e7bab436916ccb10961c4a382cd3e03f47f", size = 4496081 },
|
||||
{ url = "https://files.pythonhosted.org/packages/84/9c/9bcd66f714d7e25b64118e3952d52841a4babc6d97b6d28e2261c52045d4/pillow-11.1.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3cdcdb0b896e981678eee140d882b70092dac83ac1cdf6b3a60e2216a73f2b91", size = 4296513 },
|
||||
{ url = "https://files.pythonhosted.org/packages/db/61/ada2a226e22da011b45f7104c95ebda1b63dcbb0c378ad0f7c2a710f8fd2/pillow-11.1.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:36ba10b9cb413e7c7dfa3e189aba252deee0602c86c309799da5a74009ac7a1c", size = 4431298 },
|
||||
{ url = "https://files.pythonhosted.org/packages/e7/c4/fc6e86750523f367923522014b821c11ebc5ad402e659d8c9d09b3c9d70c/pillow-11.1.0-cp312-cp312-win32.whl", hash = "sha256:cfd5cd998c2e36a862d0e27b2df63237e67273f2fc78f47445b14e73a810e7e6", size = 2291630 },
|
||||
{ url = "https://files.pythonhosted.org/packages/08/5c/2104299949b9d504baf3f4d35f73dbd14ef31bbd1ddc2c1b66a5b7dfda44/pillow-11.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:a697cd8ba0383bba3d2d3ada02b34ed268cb548b369943cd349007730c92bddf", size = 2626369 },
|
||||
{ url = "https://files.pythonhosted.org/packages/37/f3/9b18362206b244167c958984b57c7f70a0289bfb59a530dd8af5f699b910/pillow-11.1.0-cp312-cp312-win_arm64.whl", hash = "sha256:4dd43a78897793f60766563969442020e90eb7847463eca901e41ba186a7d4a5", size = 2375240 },
|
||||
{ url = "https://files.pythonhosted.org/packages/fa/c5/389961578fb677b8b3244fcd934f720ed25a148b9a5cc81c91bdf59d8588/pillow-11.1.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:8c730dc3a83e5ac137fbc92dfcfe1511ce3b2b5d7578315b63dbbb76f7f51d90", size = 3198345 },
|
||||
{ url = "https://files.pythonhosted.org/packages/c4/fa/803c0e50ffee74d4b965229e816af55276eac1d5806712de86f9371858fd/pillow-11.1.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:7d33d2fae0e8b170b6a6c57400e077412240f6f5bb2a342cf1ee512a787942bb", size = 3072938 },
|
||||
{ url = "https://files.pythonhosted.org/packages/dc/67/2a3a5f8012b5d8c63fe53958ba906c1b1d0482ebed5618057ef4d22f8076/pillow-11.1.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a8d65b38173085f24bc07f8b6c505cbb7418009fa1a1fcb111b1f4961814a442", size = 3400049 },
|
||||
{ url = "https://files.pythonhosted.org/packages/e5/a0/514f0d317446c98c478d1872497eb92e7cde67003fed74f696441e647446/pillow-11.1.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:015c6e863faa4779251436db398ae75051469f7c903b043a48f078e437656f83", size = 3422431 },
|
||||
{ url = "https://files.pythonhosted.org/packages/cd/00/20f40a935514037b7d3f87adfc87d2c538430ea625b63b3af8c3f5578e72/pillow-11.1.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:d44ff19eea13ae4acdaaab0179fa68c0c6f2f45d66a4d8ec1eda7d6cecbcc15f", size = 3446208 },
|
||||
{ url = "https://files.pythonhosted.org/packages/28/3c/7de681727963043e093c72e6c3348411b0185eab3263100d4490234ba2f6/pillow-11.1.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:d3d8da4a631471dfaf94c10c85f5277b1f8e42ac42bade1ac67da4b4a7359b73", size = 3509746 },
|
||||
{ url = "https://files.pythonhosted.org/packages/41/67/936f9814bdd74b2dfd4822f1f7725ab5d8ff4103919a1664eb4874c58b2f/pillow-11.1.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:4637b88343166249fe8aa94e7c4a62a180c4b3898283bb5d3d2fd5fe10d8e4e0", size = 2626353 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pluggy"
|
||||
version = "1.5.0"
|
||||
@ -656,6 +828,15 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/f9/9b/335f9764261e915ed497fcdeb11df5dfd6f7bf257d4a6a2a686d80da4d54/requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6", size = 64928 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sniffio"
|
||||
version = "1.3.1"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/a2/87/a6771e1546d97e7e041b6ae58d80074f81b7d5121207425c964ddf5cfdbd/sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc", size = 20372 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "syrupy"
|
||||
version = "4.8.1"
|
||||
@ -688,7 +869,10 @@ version = "2.0.1"
|
||||
source = { virtual = "." }
|
||||
dependencies = [
|
||||
{ name = "docker" },
|
||||
{ name = "huggingface-hub" },
|
||||
{ name = "numpy" },
|
||||
{ name = "openai" },
|
||||
{ name = "pillow" },
|
||||
{ name = "pydantic" },
|
||||
{ name = "pytest" },
|
||||
{ name = "pytest-asyncio" },
|
||||
@ -699,7 +883,10 @@ dependencies = [
|
||||
[package.metadata]
|
||||
requires-dist = [
|
||||
{ name = "docker", specifier = ">=7" },
|
||||
{ name = "huggingface-hub", specifier = ">=0.29" },
|
||||
{ name = "numpy", specifier = ">=2.0" },
|
||||
{ name = "openai", specifier = ">=1.65" },
|
||||
{ name = "pillow", specifier = ">=11.1.0" },
|
||||
{ name = "pydantic", specifier = ">2,<3" },
|
||||
{ name = "pytest", specifier = ">=8.3.0" },
|
||||
{ name = "pytest-asyncio", specifier = ">=0.23.1" },
|
||||
@ -741,7 +928,7 @@ name = "tqdm"
|
||||
version = "4.67.1"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "colorama", marker = "sys_platform == 'win32'" },
|
||||
{ name = "colorama", marker = "platform_system == 'Windows'" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/a8/4b/29b4ef32e036bb34e4ab51796dd745cdba7ed47ad142a9f4a1eb8e0c744d/tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2", size = 169737 }
|
||||
wheels = [
|
||||
|
@ -97,11 +97,10 @@ fn get_config(
|
||||
let filename = if !path.exists() {
|
||||
// Assume it's a hub id
|
||||
|
||||
let mut builder = if let Ok(token) = std::env::var("HF_TOKEN") {
|
||||
let mut builder = ApiBuilder::from_env();
|
||||
if let Ok(token) = std::env::var("HF_TOKEN") {
|
||||
// env variable has precedence over on file token.
|
||||
ApiBuilder::new().with_token(Some(token))
|
||||
} else {
|
||||
ApiBuilder::new()
|
||||
builder = builder.with_token(Some(token))
|
||||
};
|
||||
if let Ok(origin) = env::var("HF_HUB_USER_AGENT_ORIGIN") {
|
||||
builder = builder.with_user_agent("origin", origin.as_str());
|
||||
|
@ -1522,7 +1522,7 @@ pub async fn run(
|
||||
|
||||
// Shared API builder initialization
|
||||
let api_builder = || {
|
||||
let mut builder = ApiBuilder::new().with_progress(false);
|
||||
let mut builder = ApiBuilder::from_env().with_progress(false);
|
||||
if let Some(token) = authorization_token {
|
||||
builder = builder.with_token(Some(token));
|
||||
}
|
||||
|
@ -699,7 +699,7 @@ fn image_tokens(
|
||||
// TODO: prefer using the config to determine the number of features
|
||||
let num_mm_soft_tokens_per_image = 256;
|
||||
format!(
|
||||
"\n\n<start_of_image>{:?}<end_of_image>\n\n",
|
||||
"\n\n<start_of_image>{}<end_of_image>\n\n",
|
||||
"<image_soft_token>".repeat(num_mm_soft_tokens_per_image)
|
||||
)
|
||||
}
|
||||
|
@ -205,7 +205,6 @@ class LoraWeights(AdapterWeights):
|
||||
lora_a_list = [None] * nlayers
|
||||
lora_b_list = [None] * nlayers
|
||||
|
||||
# import ipdb; ipdb.set_trace()
|
||||
for layer_id in range(nlayers):
|
||||
key = (layer_id, layer_type)
|
||||
if key not in target_to_layer:
|
||||
|
@ -38,6 +38,7 @@ def paged_attention(
|
||||
*,
|
||||
kv_scales: KVScales,
|
||||
softcap: Optional[float] = None,
|
||||
window_size_left: Optional[int] = -1,
|
||||
):
|
||||
# Adapted from: https://github.com/vllm-project/vllm/blob/f8a1e39fae05ca610be8d5a78be9d40f5274e5fc/vllm/model_executor/layers/attention.py
|
||||
# Copyright 2023 The vLLM team. All rights
|
||||
@ -79,12 +80,15 @@ def paged_attention(
|
||||
sm_scale=softmax_scale,
|
||||
k_scale=kv_scales.key_scale_cpu if can_scale else 1.0,
|
||||
v_scale=kv_scales.value_scale_cpu if can_scale else 1.0,
|
||||
window_left=window_size_left,
|
||||
)
|
||||
elif ATTENTION == "flashdecoding":
|
||||
max_q = 1
|
||||
max_k = max_s
|
||||
import flash_attn_2_cuda
|
||||
|
||||
window_size_right = -1 if window_size_left == -1 else 0
|
||||
|
||||
# TODO fixme when flash contains the fix.
|
||||
# Number of splits is not correctly handled
|
||||
# by the current path
|
||||
@ -109,8 +113,8 @@ def paged_attention(
|
||||
softmax_scale,
|
||||
False, # zero_tensors
|
||||
True, # causal
|
||||
-1, # Window_left
|
||||
-1, # Window right
|
||||
window_size_left, # Window_left
|
||||
window_size_right, # Window right
|
||||
softcap,
|
||||
False, # return softmax
|
||||
None, # generator
|
||||
@ -253,6 +257,7 @@ def attention(
|
||||
sm_scale=softmax_scale,
|
||||
k_scale=kv_scales.key_scale_cpu if can_scale else 1.0,
|
||||
v_scale=kv_scales.value_scale_cpu if can_scale else 1.0,
|
||||
window_left=window_size_left,
|
||||
)
|
||||
|
||||
# If we are using flashdecoding or paged, we always use flash-attn for
|
||||
|
@ -52,7 +52,6 @@ def use_prefill_with_paged_kv_state(
|
||||
page_size: int,
|
||||
kv_dtype: torch.dtype,
|
||||
q_dtype: torch.dtype,
|
||||
window_left: int,
|
||||
):
|
||||
"""
|
||||
Context manager to set the active flashinfer prefill state to the given
|
||||
@ -95,7 +94,6 @@ def use_prefill_with_paged_kv_state(
|
||||
kv_data_type=kv_dtype,
|
||||
q_data_type=q_dtype,
|
||||
page_size=page_size,
|
||||
window_left=-1 if window_left is None else window_left,
|
||||
)
|
||||
yield
|
||||
finally:
|
||||
@ -172,7 +170,6 @@ def use_decode_state(
|
||||
page_size: int,
|
||||
kv_cache_dtype: torch.dtype,
|
||||
q_dtype: torch.dtype,
|
||||
window_left: int,
|
||||
):
|
||||
"""
|
||||
Context manager to set the active flashinfer decoding state to the given
|
||||
@ -209,7 +206,6 @@ def use_decode_state(
|
||||
page_size=page_size,
|
||||
data_type=kv_cache_dtype,
|
||||
q_data_type=q_dtype,
|
||||
window_left=-1 if window_left is None else window_left,
|
||||
)
|
||||
yield
|
||||
finally:
|
||||
|
@ -78,6 +78,7 @@ def paged_attention(
|
||||
*,
|
||||
kv_scales: KVScales,
|
||||
softcap: Optional[float] = None,
|
||||
window_size_left: Optional[int] = -1,
|
||||
):
|
||||
if softcap is not None:
|
||||
raise NotImplementedError("softcap is not available in IPEX")
|
||||
|
@ -59,6 +59,7 @@ def paged_attention(
|
||||
*,
|
||||
kv_scales: KVScales,
|
||||
softcap: Optional[float] = None,
|
||||
window_size_left: Optional[int] = -1,
|
||||
):
|
||||
# Adapted from: https://github.com/vllm-project/vllm/blob/f8a1e39fae05ca610be8d5a78be9d40f5274e5fc/vllm/model_executor/layers/attention.py
|
||||
# Copyright 2023 The vLLM team. All rights
|
||||
@ -82,6 +83,8 @@ def paged_attention(
|
||||
max_k = max_s
|
||||
import flash_attn_2_cuda
|
||||
|
||||
window_size_right = -1 if window_size_left == -1 else 0
|
||||
|
||||
if softcap is None:
|
||||
softcap = 0.0
|
||||
out = flash_attn_2_cuda.varlen_fwd(
|
||||
@ -101,8 +104,8 @@ def paged_attention(
|
||||
softmax_scale,
|
||||
False, # zero_tensors
|
||||
True, # causal
|
||||
-1, # Window_left
|
||||
-1, # Window right
|
||||
window_size_left, # Window_left
|
||||
window_size_right, # Window right
|
||||
softcap,
|
||||
False, # return softmax
|
||||
None, # generator
|
||||
|
@ -272,12 +272,12 @@ class ModelType(enum.Enum):
|
||||
GEMMA3 = {
|
||||
"type": "gemma3",
|
||||
"name": "Gemma3",
|
||||
"url": "https://huggingface.co/collections/google/gemma-3",
|
||||
"url": "https://huggingface.co/collections/google/gemma-3-release-67c6c6f89c4f76621268bb6d",
|
||||
}
|
||||
GEMMA3_TEXT = {
|
||||
"type": "gemma3_text",
|
||||
"name": "Gemma3 Text",
|
||||
"url": "https://huggingface.co/collections/google/gemma-3",
|
||||
"url": "https://huggingface.co/collections/google/gemma-3-release-67c6c6f89c4f76621268bb6d",
|
||||
}
|
||||
COHERE = {
|
||||
"type": "cohere",
|
||||
|
@ -287,6 +287,7 @@ class FlashGemma2Attention(torch.nn.Module):
|
||||
max_s,
|
||||
softcap=self.softcap,
|
||||
kv_scales=self.kv_scales,
|
||||
window_size_left=self.window_size,
|
||||
)
|
||||
|
||||
return self.o_proj(
|
||||
|
@ -281,22 +281,12 @@ class FlashGemma3Attention(torch.nn.Module):
|
||||
padded_query = padded_query.transpose(1, 2).contiguous()
|
||||
padded_key = padded_key.transpose(1, 2).contiguous()
|
||||
padded_value = padded_value.transpose(1, 2).contiguous()
|
||||
zeros_to_add = torch.zeros(
|
||||
padded_key.size(0),
|
||||
self.num_key_value_heads,
|
||||
1,
|
||||
self.head_size,
|
||||
dtype=padded_key.dtype,
|
||||
device=padded_key.device,
|
||||
)
|
||||
key_states = torch.cat([padded_key, zeros_to_add], dim=2)
|
||||
value_states = torch.cat([padded_value, zeros_to_add], dim=2)
|
||||
|
||||
# Compute attention
|
||||
attn_output = F.scaled_dot_product_attention(
|
||||
padded_query,
|
||||
key_states,
|
||||
value_states,
|
||||
padded_key,
|
||||
padded_value,
|
||||
attn_mask=attention_mask,
|
||||
scale=self.softmax_scale,
|
||||
enable_gqa=self.enable_gqa,
|
||||
@ -327,6 +317,7 @@ class FlashGemma3Attention(torch.nn.Module):
|
||||
max_s,
|
||||
softcap=self.softcap,
|
||||
kv_scales=self.kv_scales,
|
||||
window_size_left=self.window_size,
|
||||
)
|
||||
|
||||
return self.o_proj(
|
||||
@ -513,6 +504,7 @@ class FlashGemma3Model(torch.nn.Module):
|
||||
max_s: int,
|
||||
adapter_data: Optional[torch.Tensor] = None,
|
||||
attention_mask: Optional[torch.Tensor] = None,
|
||||
attention_mask_local: Optional[torch.Tensor] = None,
|
||||
) -> torch.Tensor:
|
||||
hidden_states = inputs_embeds
|
||||
|
||||
@ -525,25 +517,6 @@ class FlashGemma3Model(torch.nn.Module):
|
||||
position_ids, max_s, hidden_states.dtype
|
||||
)
|
||||
|
||||
# apply sliding window mask if needed
|
||||
if layer.self_attn.window_size > 0 and attention_mask is not None:
|
||||
min_dtype = torch.finfo(hidden_states.dtype).min
|
||||
# prefill may be larger than sliding window
|
||||
effective_seq_len = max(
|
||||
position_ids.shape[0], self.layers[i].self_attn.window_size
|
||||
)
|
||||
sliding_window_mask = torch.tril(
|
||||
torch.ones_like(attention_mask, dtype=torch.bool),
|
||||
diagonal=-self.layers[i].self_attn.window_size,
|
||||
)
|
||||
attention_mask = torch.where(
|
||||
sliding_window_mask, min_dtype, attention_mask
|
||||
)
|
||||
offset = max(0, position_ids.shape[0] - effective_seq_len)
|
||||
attention_mask = attention_mask[
|
||||
:, :, offset : offset + effective_seq_len
|
||||
]
|
||||
|
||||
hidden_states, residual = layer(
|
||||
hidden_states,
|
||||
residual,
|
||||
@ -556,7 +529,11 @@ class FlashGemma3Model(torch.nn.Module):
|
||||
seqlen,
|
||||
max_s,
|
||||
adapter_data,
|
||||
attention_mask,
|
||||
(
|
||||
attention_mask
|
||||
if self.layers[i].self_attn.window_size == -1
|
||||
else attention_mask_local
|
||||
),
|
||||
)
|
||||
|
||||
hidden_states, _ = self.norm(hidden_states, residual)
|
||||
@ -723,24 +700,6 @@ class Gemma3ForConditionalGeneration(nn.Module):
|
||||
config.pad_token_id if config.pad_token_id is not None else -1
|
||||
)
|
||||
|
||||
def get_image_token_mask(self, input_ids):
|
||||
device = input_ids.device
|
||||
|
||||
start_token_id = self.config.boi_token_index
|
||||
K = self.config.mm_tokens_per_image
|
||||
|
||||
mask = torch.zeros_like(input_ids, dtype=torch.bool, device=device)
|
||||
start_positions = (input_ids == start_token_id).nonzero(as_tuple=True)[0]
|
||||
mask_indices = start_positions.unsqueeze(1) + torch.arange(
|
||||
1, K + 1, device=device
|
||||
).unsqueeze(0)
|
||||
|
||||
valid_mask = mask_indices < input_ids.size(0)
|
||||
mask_indices = mask_indices[valid_mask]
|
||||
mask[mask_indices] = True
|
||||
|
||||
return mask
|
||||
|
||||
def get_attention_mask(
|
||||
self, input_ids, max_s, cu_seqlen_prefill, dtype, image_token_mask
|
||||
):
|
||||
@ -751,7 +710,7 @@ class Gemma3ForConditionalGeneration(nn.Module):
|
||||
batch_size = len(lengths)
|
||||
|
||||
sequence_length = max(lengths)
|
||||
target_length = max_s
|
||||
target_length = sequence_length
|
||||
# Create the padding mask from the computed lengths.
|
||||
# pad_mask: [batch, sequence_length] where True indicates valid tokens.
|
||||
seq_range = torch.arange(sequence_length, device=device).unsqueeze(0)
|
||||
@ -847,7 +806,7 @@ class Gemma3ForConditionalGeneration(nn.Module):
|
||||
|
||||
# # Determine the maximum sequence length (after padding) from query.
|
||||
# sequence_length = max(lengths)
|
||||
# target_length = max_s
|
||||
# target_length = sequence_length
|
||||
|
||||
# # Create the padding mask from the computed lengths.
|
||||
# # pad_mask: [batch, sequence_length] where True indicates valid tokens.
|
||||
@ -885,6 +844,26 @@ class Gemma3ForConditionalGeneration(nn.Module):
|
||||
# input_ids.device
|
||||
# )
|
||||
|
||||
if attention_mask is not None:
|
||||
min_dtype = torch.finfo(inputs_embeds.dtype).min
|
||||
# prefill may be larger than sliding window
|
||||
effective_seq_len = max(
|
||||
position_ids.shape[0], self.config.text_config.sliding_window
|
||||
)
|
||||
sliding_window_mask = torch.tril(
|
||||
torch.ones_like(attention_mask, dtype=torch.bool),
|
||||
diagonal=-self.config.text_config.sliding_window,
|
||||
)
|
||||
attention_mask_local = torch.where(
|
||||
sliding_window_mask, min_dtype, attention_mask
|
||||
)
|
||||
offset = max(0, position_ids.shape[0] - effective_seq_len)
|
||||
attention_mask_local = attention_mask_local[
|
||||
:, :, :, offset : offset + effective_seq_len
|
||||
]
|
||||
else:
|
||||
attention_mask_local = None
|
||||
|
||||
hidden_states = self.text_model.model(
|
||||
inputs_embeds=inputs_embeds,
|
||||
position_ids=position_ids,
|
||||
@ -895,6 +874,7 @@ class Gemma3ForConditionalGeneration(nn.Module):
|
||||
seqlen=seqlen,
|
||||
max_s=max_s,
|
||||
attention_mask=attention_mask,
|
||||
attention_mask_local=attention_mask_local,
|
||||
)
|
||||
|
||||
if lm_head_indices is not None:
|
||||
|
@ -242,6 +242,7 @@ class MistralAttention(torch.nn.Module):
|
||||
seqlen,
|
||||
max_s,
|
||||
kv_scales=self.kv_scales,
|
||||
window_size_left=self.max_past,
|
||||
)
|
||||
|
||||
return self.o_proj(
|
||||
|
@ -290,6 +290,7 @@ class MixtralAttention(torch.nn.Module):
|
||||
seqlen,
|
||||
max_s,
|
||||
kv_scales=self.kv_scales,
|
||||
window_size_left=self.max_past,
|
||||
)
|
||||
|
||||
return self.o_proj(attn_output.view(-1, self.num_heads * self.head_size))
|
||||
|
@ -31,7 +31,7 @@ class PaliGemmaForConditionalGeneration(nn.Module):
|
||||
super().__init__()
|
||||
config.vision_config.quantize = config.quantize
|
||||
self.vision_tower = load_vision_model(
|
||||
prefix="vision_model" if not prefix else f"{prefix}.vision_model",
|
||||
prefix="vision_tower" if not prefix else f"{prefix}.vision_tower",
|
||||
config=config.vision_config,
|
||||
weights=weights,
|
||||
)
|
||||
|
@ -74,7 +74,7 @@ class Qwen2Attention(torch.nn.Module):
|
||||
weights,
|
||||
):
|
||||
super().__init__()
|
||||
self.max_past = (
|
||||
self.window_size = (
|
||||
config.sliding_window if config.sliding_window is not None else -1
|
||||
)
|
||||
self.num_heads = config.num_attention_heads
|
||||
@ -172,7 +172,7 @@ class Qwen2Attention(torch.nn.Module):
|
||||
seqlen=seqlen,
|
||||
block_tables=block_tables,
|
||||
softmax_scale=self.softmax_scale,
|
||||
window_size_left=self.max_past,
|
||||
window_size_left=self.window_size,
|
||||
)
|
||||
# Decode
|
||||
else:
|
||||
@ -185,6 +185,7 @@ class Qwen2Attention(torch.nn.Module):
|
||||
seqlen,
|
||||
max_s,
|
||||
kv_scales=self.kv_scales,
|
||||
window_size_left=self.window_size,
|
||||
)
|
||||
|
||||
return self.o_proj(
|
||||
@ -405,10 +406,10 @@ class Qwen2ForCausalLM(torch.nn.Module):
|
||||
weights=weights,
|
||||
)
|
||||
|
||||
self.max_past = config.sliding_window
|
||||
self.max_past_tensor = (
|
||||
self.window_size = config.sliding_window
|
||||
self.window_size_tensor = (
|
||||
torch.tensor(config.sliding_window, device=weights.device)
|
||||
if self.max_past is not None
|
||||
if self.window_size is not None
|
||||
else None
|
||||
)
|
||||
|
||||
@ -430,10 +431,10 @@ class Qwen2ForCausalLM(torch.nn.Module):
|
||||
if prefill_cache_indices is not None:
|
||||
# Slots also need to be sliced as it has the same size as the whole kv tensor
|
||||
slots = slots[prefill_cache_indices]
|
||||
elif self.max_past is not None:
|
||||
elif self.window_size is not None:
|
||||
# Clamp in decode mode as paged attention requires clamped values whereas the flash attention
|
||||
# kernel requires the true values
|
||||
seqlen = seqlen.clamp(max=self.max_past_tensor)
|
||||
seqlen = seqlen.clamp(max=self.window_size_tensor)
|
||||
|
||||
inputs_embeds = self.embed_tokens(input_ids)
|
||||
|
||||
|
@ -291,6 +291,7 @@ class Starcoder2Attention(torch.nn.Module):
|
||||
seqlen,
|
||||
max_s,
|
||||
kv_scales=self.kv_scales,
|
||||
window_size_left=self.max_past,
|
||||
)
|
||||
|
||||
return self.o_proj(
|
||||
|
@ -263,7 +263,7 @@ class Gemma3ImageProcessor(BaseImageProcessor):
|
||||
return_tensors: Optional[Union[str, TensorType]] = None,
|
||||
data_format: Optional[ChannelDimension] = ChannelDimension.FIRST,
|
||||
input_data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||
do_convert_rgb: bool = None,
|
||||
do_convert_rgb: bool = True,
|
||||
do_pan_and_scan: bool = None,
|
||||
pan_and_scan_min_crop_size: int = None,
|
||||
pan_and_scan_max_num_crops: int = None,
|
||||
|
@ -82,7 +82,7 @@ class Gemma3Processor(ProcessorMixin):
|
||||
do_rescale=False,
|
||||
resample=PILImageResampling.BILINEAR,
|
||||
)
|
||||
# import ipdb; ipdb.set_trace()
|
||||
|
||||
self.image_token_id = tokenizer.image_token_id
|
||||
image_tokens_expanded = "".join(
|
||||
[tokenizer.image_token] * num_mm_soft_tokens_per_image
|
||||
@ -91,8 +91,6 @@ class Gemma3Processor(ProcessorMixin):
|
||||
f"\n\n{tokenizer.boi_token}{image_tokens_expanded}{tokenizer.eoi_token}\n\n"
|
||||
)
|
||||
|
||||
# import ipdb; ipdb.set_trace()
|
||||
|
||||
self.image_processor = image_processor
|
||||
self.tokenizer = tokenizer
|
||||
self.chat_template = chat_template
|
||||
|
@ -633,7 +633,7 @@ class Qwen2_5VisionModel(nn.Module):
|
||||
config=config,
|
||||
weights=weights,
|
||||
)
|
||||
# import ipdb; ipdb.set_trace()
|
||||
|
||||
self.temporal_patch_size = config.temporal_patch_size
|
||||
self.spatial_patch_size = config.spatial_patch_size
|
||||
self.in_channels = config.in_channels
|
||||
|
@ -83,24 +83,11 @@ from text_generation_server.models.metadata_kernels import (
|
||||
|
||||
tracer = trace.get_tracer(__name__)
|
||||
|
||||
# Will be set in init
|
||||
SLIDING_WINDOW: Optional[int] = None
|
||||
|
||||
|
||||
def small_power_of_2(n: int):
|
||||
return 1 << ((n - 1).bit_length() - 1)
|
||||
|
||||
|
||||
def set_sliding_window(sliding_window: int):
|
||||
global SLIDING_WINDOW
|
||||
SLIDING_WINDOW = sliding_window
|
||||
|
||||
|
||||
def get_sliding_windows() -> int:
|
||||
global SLIDING_WINDOW
|
||||
return SLIDING_WINDOW
|
||||
|
||||
|
||||
def init_cpu_threads_env(rank_id: int, world_size: int):
|
||||
import importlib.util
|
||||
|
||||
@ -1002,10 +989,8 @@ class FlashCausalLMBatch(Batch):
|
||||
self.slot_indices,
|
||||
)
|
||||
|
||||
sliding_window = get_sliding_windows()
|
||||
position_ids = []
|
||||
slot_indices = []
|
||||
prefill_cache_indices = []
|
||||
all_prefill_logprobs = True
|
||||
no_prefill_logprobs = True
|
||||
prefill_cu_outlens = [0]
|
||||
@ -1064,14 +1049,6 @@ class FlashCausalLMBatch(Batch):
|
||||
# Update
|
||||
cumulative_slot_tokens += len(request_slots)
|
||||
|
||||
# Create tensor to slice into the kv tensor in prefill
|
||||
if sliding_window is not None:
|
||||
request_prefill_cache_indices = torch.arange(
|
||||
cumulative_length + max(0, input_length - sliding_window),
|
||||
cumulative_length + input_length,
|
||||
dtype=torch.int64,
|
||||
)
|
||||
|
||||
# Prefill logprobs is ignored if the request is done prefilling
|
||||
prefill_logprobs = r.prefill_logprobs and request_prefilling
|
||||
|
||||
@ -1085,9 +1062,6 @@ class FlashCausalLMBatch(Batch):
|
||||
prefill_cu_outlens.append(prefill_out_cumulative_length + 1)
|
||||
prefill_out_cumulative_length += 1
|
||||
|
||||
if sliding_window is not None:
|
||||
prefill_cache_indices.append(request_prefill_cache_indices)
|
||||
|
||||
ADAPTER_TO_INDEX = get_adapter_to_index()
|
||||
if ADAPTER_TO_INDEX:
|
||||
adapter_index = ADAPTER_TO_INDEX.get(r.adapter_id, 0)
|
||||
@ -1151,24 +1125,18 @@ class FlashCausalLMBatch(Batch):
|
||||
position_ids = torch.cat(position_ids)
|
||||
if slot_indices:
|
||||
slot_indices = torch.cat(slot_indices)
|
||||
if sliding_window is not None:
|
||||
prefill_cache_indices = torch.cat(prefill_cache_indices)
|
||||
else:
|
||||
if position_ids:
|
||||
position_ids = position_ids[0]
|
||||
if slot_indices:
|
||||
slot_indices = slot_indices[0]
|
||||
if sliding_window is not None:
|
||||
prefill_cache_indices = prefill_cache_indices[0]
|
||||
|
||||
if not has_triton():
|
||||
self.position_ids = position_ids.to(device)
|
||||
self.slot_indices = slot_indices.to(device)
|
||||
|
||||
self.prefill_cu_outlens = prefill_cu_outlens
|
||||
self.prefill_cache_indices = (
|
||||
prefill_cache_indices.to(device) if sliding_window is not None else None
|
||||
)
|
||||
self.prefill_cache_indices = None
|
||||
|
||||
if all_prefill_logprobs:
|
||||
prefill_head_indices = None
|
||||
@ -1306,9 +1274,7 @@ class FlashCausalLM(Model):
|
||||
if text_config is not None:
|
||||
config = text_config
|
||||
|
||||
if getattr(config, "sliding_window", None) is not None:
|
||||
set_sliding_window(config.sliding_window)
|
||||
else:
|
||||
if getattr(config, "sliding_window", None) is None:
|
||||
config.sliding_window = None
|
||||
|
||||
self.num_layers = config.num_hidden_layers
|
||||
@ -2500,7 +2466,6 @@ class FlashCausalLM(Model):
|
||||
page_size=BLOCK_SIZE,
|
||||
kv_dtype=self.kv_cache_dtype,
|
||||
q_dtype=self.dtype,
|
||||
window_left=self.sliding_window,
|
||||
)
|
||||
else:
|
||||
assert input_lengths_tensor is not None
|
||||
@ -2514,5 +2479,4 @@ class FlashCausalLM(Model):
|
||||
page_size=BLOCK_SIZE,
|
||||
kv_cache_dtype=self.kv_cache_dtype,
|
||||
q_dtype=self.dtype,
|
||||
window_left=self.sliding_window,
|
||||
)
|
||||
|
@ -110,7 +110,7 @@ class Model(ABC):
|
||||
requires_padding=self.requires_padding,
|
||||
dtype=str(self.dtype),
|
||||
device_type=self.device.type,
|
||||
window_size=self.sliding_window,
|
||||
window_size=None, # Setting this parameter to None disabled the block logic with sliding window.
|
||||
speculate=self.speculate,
|
||||
support_chunking=self.support_chunking,
|
||||
use_prefix_caching=PREFIX_CACHING,
|
||||
|
Loading…
Reference in New Issue
Block a user