From 8094de91fcdc01e49902443ff516da3029cb0c8a Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Sat, 5 Apr 2025 15:33:30 +0000 Subject: [PATCH] Add tests --- docs/source/supported_models.md | 1 + .../test_flash_llama4.json | 613 ++++++++++++++++++ ...est_flash_llama4_image_base64_rgb_jpg.json | 26 + ...est_flash_llama4_image_base64_rgb_png.json | 26 + .../test_flash_llama4_image_base64_rgba.json | 26 + .../test_flash_llama4_image_cow.json | 26 + .../test_flash_llama4_image_cow_dog.json | 26 + .../models/test_transformers_llama4.py | 155 +++++ server/pyproject.toml | 3 +- .../text_generation_server/layers/rotary.py | 4 +- .../models/transformers_flash_vlm.py | 1 + server/uv.lock | 25 +- 12 files changed, 925 insertions(+), 7 deletions(-) create mode 100644 integration-tests/models/__snapshots__/test_transformers_llama4/test_flash_llama4.json create mode 100644 integration-tests/models/__snapshots__/test_transformers_llama4/test_flash_llama4_image_base64_rgb_jpg.json create mode 100644 integration-tests/models/__snapshots__/test_transformers_llama4/test_flash_llama4_image_base64_rgb_png.json create mode 100644 integration-tests/models/__snapshots__/test_transformers_llama4/test_flash_llama4_image_base64_rgba.json create mode 100644 integration-tests/models/__snapshots__/test_transformers_llama4/test_flash_llama4_image_cow.json create mode 100644 integration-tests/models/__snapshots__/test_transformers_llama4/test_flash_llama4_image_cow_dog.json create mode 100644 integration-tests/models/test_transformers_llama4.py diff --git a/docs/source/supported_models.md b/docs/source/supported_models.md index f168fd76..98088b10 100644 --- a/docs/source/supported_models.md +++ b/docs/source/supported_models.md @@ -9,6 +9,7 @@ Text Generation Inference enables serving optimized models. The following sectio - [Idefics 3](https://huggingface.co/HuggingFaceM4/Idefics3-8B-Llama3) (Multimodal) - [Llava Next (1.6)](https://huggingface.co/llava-hf/llava-v1.6-vicuna-13b-hf) (Multimodal) - [Llama](https://huggingface.co/collections/meta-llama/llama-31-669fc079a0c406a149a5738f) +- [Llama4](https://huggingface.co/collections/ll-re/llama-4-67f03a4fd5f976f3d443fcfd) - [Phi 3](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct) - [Granite](https://huggingface.co/ibm-granite/granite-3.0-8b-instruct) - [Gemma](https://huggingface.co/google/gemma-7b) diff --git a/integration-tests/models/__snapshots__/test_transformers_llama4/test_flash_llama4.json b/integration-tests/models/__snapshots__/test_transformers_llama4/test_flash_llama4.json new file mode 100644 index 00000000..eefcb353 --- /dev/null +++ b/integration-tests/models/__snapshots__/test_transformers_llama4/test_flash_llama4.json @@ -0,0 +1,613 @@ +{ + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 100, + "prefill": [], + "seed": null, + "tokens": [ + { + "id": 2721, + "logprob": -0.21582031, + "special": false, + "text": " people" + }, + { + "id": 21807, + "logprob": -0.26953125, + "special": false, + "text": " died" + }, + { + "id": 310, + "logprob": -0.95703125, + "special": false, + "text": " in" + }, + { + "id": 290, + "logprob": -1.3359375, + "special": false, + "text": " the" + }, + { + "id": 220, + "logprob": -1.3828125, + "special": false, + "text": " " + }, + { + "id": 7284, + "logprob": -0.011291504, + "special": false, + "text": "191" + }, + { + "id": 36, + "logprob": -0.011413574, + "special": false, + "text": "8" + }, + { + "id": 18938, + "logprob": -0.23242188, + "special": false, + "text": " flu" + }, + { + "id": 27650, + "logprob": -0.0010070801, + "special": false, + "text": " pandemic" + }, + { + "id": 26, + "logprob": -0.69140625, + "special": false, + "text": "." + }, + { + "id": 114059, + "logprob": -1.4375, + "special": false, + "text": " Estimating" + }, + { + "id": 290, + "logprob": -0.24316406, + "special": false, + "text": " the" + }, + { + "id": 10593, + "logprob": -0.37304688, + "special": false, + "text": " death" + }, + { + "id": 49973, + "logprob": -0.025390625, + "special": false, + "text": " toll" + }, + { + "id": 323, + "logprob": -0.27539062, + "special": false, + "text": " of" + }, + { + "id": 290, + "logprob": -0.057617188, + "special": false, + "text": " the" + }, + { + "id": 220, + "logprob": -0.040527344, + "special": false, + "text": " " + }, + { + "id": 7284, + "logprob": -0.00050735474, + "special": false, + "text": "191" + }, + { + "id": 36, + "logprob": -9.298325e-06, + "special": false, + "text": "8" + }, + { + "id": 18938, + "logprob": -0.09863281, + "special": false, + "text": " flu" + }, + { + "id": 27650, + "logprob": -0.0011749268, + "special": false, + "text": " pandemic" + }, + { + "id": 373, + "logprob": -0.32421875, + "special": false, + "text": " is" + }, + { + "id": 8210, + "logprob": -0.58203125, + "special": false, + "text": " difficult" + }, + { + "id": 2895, + "logprob": -0.40429688, + "special": false, + "text": " because" + }, + { + "id": 323, + "logprob": -1.2734375, + "special": false, + "text": " of" + }, + { + "id": 49119, + "logprob": -0.51171875, + "special": false, + "text": " incomplete" + }, + { + "id": 13308, + "logprob": -0.38085938, + "special": false, + "text": " records" + }, + { + "id": 341, + "logprob": -0.55859375, + "special": false, + "text": " and" + }, + { + "id": 2895, + "logprob": -0.765625, + "special": false, + "text": " because" + }, + { + "id": 323, + "logprob": -1.0, + "special": false, + "text": " of" + }, + { + "id": 290, + "logprob": -0.828125, + "special": false, + "text": " the" + }, + { + "id": 2304, + "logprob": -1.015625, + "special": false, + "text": " fact" + }, + { + "id": 511, + "logprob": -0.004638672, + "special": false, + "text": " that" + }, + { + "id": 2233, + "logprob": -0.953125, + "special": false, + "text": " many" + }, + { + "id": 323, + "logprob": -0.87890625, + "special": false, + "text": " of" + }, + { + "id": 290, + "logprob": -0.60546875, + "special": false, + "text": " the" + }, + { + "id": 6759, + "logprob": -1.6484375, + "special": false, + "text": " extra" + }, + { + "id": 40657, + "logprob": -0.00022125244, + "special": false, + "text": " deaths" + }, + { + "id": 1610, + "logprob": -0.67578125, + "special": false, + "text": " were" + }, + { + "id": 702, + "logprob": -0.30664062, + "special": false, + "text": " not" + }, + { + "id": 48692, + "logprob": -0.1953125, + "special": false, + "text": " attributed" + }, + { + "id": 328, + "logprob": -0.0079956055, + "special": false, + "text": " to" + }, + { + "id": 290, + "logprob": -0.515625, + "special": false, + "text": " the" + }, + { + "id": 18938, + "logprob": -0.0040893555, + "special": false, + "text": " flu" + }, + { + "id": 26, + "logprob": -0.083496094, + "special": false, + "text": "." + }, + { + "id": 13618, + "logprob": -0.515625, + "special": false, + "text": " Many" + }, + { + "id": 22215, + "logprob": -1.5703125, + "special": false, + "text": " experts" + }, + { + "id": 11081, + "logprob": -0.96875, + "special": false, + "text": " believe" + }, + { + "id": 511, + "logprob": -0.1171875, + "special": false, + "text": " that" + }, + { + "id": 290, + "logprob": -0.25195312, + "special": false, + "text": " the" + }, + { + "id": 220, + "logprob": -0.828125, + "special": false, + "text": " " + }, + { + "id": 7284, + "logprob": -0.00010967255, + "special": false, + "text": "191" + }, + { + "id": 36, + "logprob": -8.535385e-05, + "special": false, + "text": "8" + }, + { + "id": 18938, + "logprob": -0.056152344, + "special": false, + "text": " flu" + }, + { + "id": 27650, + "logprob": -0.0007095337, + "special": false, + "text": " pandemic" + }, + { + "id": 26132, + "logprob": -0.18847656, + "special": false, + "text": " killed" + }, + { + "id": 1867, + "logprob": -0.71484375, + "special": false, + "text": " between" + }, + { + "id": 220, + "logprob": -0.0062561035, + "special": false, + "text": " " + }, + { + "id": 1175, + "logprob": -0.009277344, + "special": false, + "text": "50" + }, + { + "id": 341, + "logprob": -0.15332031, + "special": false, + "text": " and" + }, + { + "id": 220, + "logprob": -8.34465e-07, + "special": false, + "text": " " + }, + { + "id": 1135, + "logprob": -0.00065612793, + "special": false, + "text": "100" + }, + { + "id": 5534, + "logprob": -1.4066696e-05, + "special": false, + "text": " million" + }, + { + "id": 2721, + "logprob": -0.0008392334, + "special": false, + "text": " people" + }, + { + "id": 26, + "logprob": -0.54296875, + "special": false, + "text": "." + }, + { + "id": 372, + "logprob": -1.8046875, + "special": false, + "text": " I" + }, + { + "id": 140680, + "logprob": -0.578125, + "special": false, + "text": "assistant" + }, + { + "id": 200006, + "logprob": 0.0, + "special": true, + "text": "<|header_end|>" + }, + { + "id": 368, + "logprob": 0.0, + "special": false, + "text": "\n\n" + }, + { + "id": 954, + "logprob": -0.032226562, + "special": false, + "text": "The" + }, + { + "id": 220, + "logprob": -4.4345856e-05, + "special": false, + "text": " " + }, + { + "id": 7284, + "logprob": 0.0, + "special": false, + "text": "191" + }, + { + "id": 36, + "logprob": 0.0, + "special": false, + "text": "8" + }, + { + "id": 18938, + "logprob": -0.015625, + "special": false, + "text": " flu" + }, + { + "id": 27650, + "logprob": 0.0, + "special": false, + "text": " pandemic" + }, + { + "id": 24, + "logprob": -0.0072021484, + "special": false, + "text": "," + }, + { + "id": 1437, + "logprob": -0.0001707077, + "special": false, + "text": " also" + }, + { + "id": 5711, + "logprob": 0.0, + "special": false, + "text": " known" + }, + { + "id": 486, + "logprob": 0.0, + "special": false, + "text": " as" + }, + { + "id": 290, + "logprob": -5.9604645e-07, + "special": false, + "text": " the" + }, + { + "id": 25836, + "logprob": -1.4305115e-06, + "special": false, + "text": " Spanish" + }, + { + "id": 18938, + "logprob": -0.0015029907, + "special": false, + "text": " flu" + }, + { + "id": 24, + "logprob": -0.0052490234, + "special": false, + "text": "," + }, + { + "id": 373, + "logprob": -0.3125, + "special": false, + "text": " is" + }, + { + "id": 26078, + "logprob": -0.21289062, + "special": false, + "text": " indeed" + }, + { + "id": 1085, + "logprob": -0.080078125, + "special": false, + "text": " one" + }, + { + "id": 323, + "logprob": 0.0, + "special": false, + "text": " of" + }, + { + "id": 290, + "logprob": 0.0, + "special": false, + "text": " the" + }, + { + "id": 2167, + "logprob": -0.20117188, + "special": false, + "text": " most" + }, + { + "id": 92679, + "logprob": -0.12695312, + "special": false, + "text": " devastating" + }, + { + "id": 854, + "logprob": -0.25976562, + "special": false, + "text": " public" + }, + { + "id": 4500, + "logprob": 0.0, + "special": false, + "text": " health" + }, + { + "id": 93079, + "logprob": -0.50390625, + "special": false, + "text": " crises" + }, + { + "id": 310, + "logprob": 0.0, + "special": false, + "text": " in" + }, + { + "id": 6023, + "logprob": -0.0015182495, + "special": false, + "text": " human" + }, + { + "id": 7068, + "logprob": 0.0, + "special": false, + "text": " history" + }, + { + "id": 26, + "logprob": -0.0012664795, + "special": false, + "text": "." + }, + { + "id": 114059, + "logprob": -0.004119873, + "special": false, + "text": " Estimating" + }, + { + "id": 290, + "logprob": -0.00033569336, + "special": false, + "text": " the" + }, + { + "id": 6318, + "logprob": -0.20117188, + "special": false, + "text": " exact" + } + ], + "top_tokens": null + }, + "generated_text": " people died in the 1918 flu pandemic. Estimating the death toll of the 1918 flu pandemic is difficult because of incomplete records and because of the fact that many of the extra deaths were not attributed to the flu. Many experts believe that the 1918 flu pandemic killed between 50 and 100 million people. Iassistant\n\nThe 1918 flu pandemic, also known as the Spanish flu, is indeed one of the most devastating public health crises in human history. Estimating the exact" +} diff --git a/integration-tests/models/__snapshots__/test_transformers_llama4/test_flash_llama4_image_base64_rgb_jpg.json b/integration-tests/models/__snapshots__/test_transformers_llama4/test_flash_llama4_image_base64_rgb_jpg.json new file mode 100644 index 00000000..cab30700 --- /dev/null +++ b/integration-tests/models/__snapshots__/test_transformers_llama4/test_flash_llama4_image_base64_rgb_jpg.json @@ -0,0 +1,26 @@ +{ + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "logprobs": null, + "message": { + "content": "The image is a blank white space with no visible objects or features. It appears to be an empty or placeholder image, devoid of any content or visual elements.", + "name": null, + "role": "assistant", + "tool_calls": null + }, + "usage": null + } + ], + "created": 1743861910, + "id": "", + "model": "ll-re/Llama-4-Scout-17B-16E-Instruct", + "object": "chat.completion", + "system_fingerprint": "3.2.1-dev0-native", + "usage": { + "completion_tokens": 34, + "prompt_tokens": 166, + "total_tokens": 200 + } +} diff --git a/integration-tests/models/__snapshots__/test_transformers_llama4/test_flash_llama4_image_base64_rgb_png.json b/integration-tests/models/__snapshots__/test_transformers_llama4/test_flash_llama4_image_base64_rgb_png.json new file mode 100644 index 00000000..9d418e81 --- /dev/null +++ b/integration-tests/models/__snapshots__/test_transformers_llama4/test_flash_llama4_image_base64_rgb_png.json @@ -0,0 +1,26 @@ +{ + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "logprobs": null, + "message": { + "content": "The image is a blank white space with no visible objects or features.", + "name": null, + "role": "assistant", + "tool_calls": null + }, + "usage": null + } + ], + "created": 1743861909, + "id": "", + "model": "ll-re/Llama-4-Scout-17B-16E-Instruct", + "object": "chat.completion", + "system_fingerprint": "3.2.1-dev0-native", + "usage": { + "completion_tokens": 15, + "prompt_tokens": 166, + "total_tokens": 181 + } +} diff --git a/integration-tests/models/__snapshots__/test_transformers_llama4/test_flash_llama4_image_base64_rgba.json b/integration-tests/models/__snapshots__/test_transformers_llama4/test_flash_llama4_image_base64_rgba.json new file mode 100644 index 00000000..dd34797d --- /dev/null +++ b/integration-tests/models/__snapshots__/test_transformers_llama4/test_flash_llama4_image_base64_rgba.json @@ -0,0 +1,26 @@ +{ + "choices": [ + { + "finish_reason": "length", + "index": 0, + "logprobs": null, + "message": { + "content": "The image is a black background with no discernible objects or features. The image appears to be a blank or empty space, devoid of any visual elements.\n\n**Key Features:**\n\n* **Color:** The dominant color of the image is black.\n* **Objects:** There are no visible objects or shapes in the image.\n* **Background:** The background of the image is a solid black color.\n\n**Conclusion:**\nIn summary, the image is a simple and empty visual representation with a black background and no", + "name": null, + "role": "assistant", + "tool_calls": null + }, + "usage": null + } + ], + "created": 1743861909, + "id": "", + "model": "ll-re/Llama-4-Scout-17B-16E-Instruct", + "object": "chat.completion", + "system_fingerprint": "3.2.1-dev0-native", + "usage": { + "completion_tokens": 100, + "prompt_tokens": 166, + "total_tokens": 266 + } +} diff --git a/integration-tests/models/__snapshots__/test_transformers_llama4/test_flash_llama4_image_cow.json b/integration-tests/models/__snapshots__/test_transformers_llama4/test_flash_llama4_image_cow.json new file mode 100644 index 00000000..e89c9925 --- /dev/null +++ b/integration-tests/models/__snapshots__/test_transformers_llama4/test_flash_llama4_image_cow.json @@ -0,0 +1,26 @@ +{ + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "logprobs": null, + "message": { + "content": "The image shows a brown cow standing on the beach with a white face and black and white marking on its ears. The cow has a white patch around its nose and mouth. The ocean and blue sky are in the background.", + "name": null, + "role": "assistant", + "tool_calls": null + }, + "usage": null + } + ], + "created": 1743863057, + "id": "", + "model": "ll-re/Llama-4-Scout-17B-16E-Instruct", + "object": "chat.completion", + "system_fingerprint": "3.2.1-dev0-native", + "usage": { + "completion_tokens": 46, + "prompt_tokens": 164, + "total_tokens": 210 + } +} diff --git a/integration-tests/models/__snapshots__/test_transformers_llama4/test_flash_llama4_image_cow_dog.json b/integration-tests/models/__snapshots__/test_transformers_llama4/test_flash_llama4_image_cow_dog.json new file mode 100644 index 00000000..f5a4a02d --- /dev/null +++ b/integration-tests/models/__snapshots__/test_transformers_llama4/test_flash_llama4_image_cow_dog.json @@ -0,0 +1,26 @@ +{ + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "logprobs": null, + "message": { + "content": "The image does not depict a dog; it shows a cow standing on a beach. Therefore, there is no breed of a dog to identify.", + "name": null, + "role": "assistant", + "tool_calls": null + }, + "usage": null + } + ], + "created": 1743863056, + "id": "", + "model": "ll-re/Llama-4-Scout-17B-16E-Instruct", + "object": "chat.completion", + "system_fingerprint": "3.2.1-dev0-native", + "usage": { + "completion_tokens": 30, + "prompt_tokens": 168, + "total_tokens": 198 + } +} diff --git a/integration-tests/models/test_transformers_llama4.py b/integration-tests/models/test_transformers_llama4.py new file mode 100644 index 00000000..a73138d1 --- /dev/null +++ b/integration-tests/models/test_transformers_llama4.py @@ -0,0 +1,155 @@ +import base64 +from io import BytesIO +from PIL import Image + +import pytest + + +@pytest.fixture(scope="module") +def flash_llama4_handle(launcher): + with launcher("ll-re/Llama-4-Scout-17B-16E-Instruct", num_shard=8) as handle: + yield handle + + +@pytest.fixture(scope="module") +async def flash_llama4(flash_llama4_handle): + await flash_llama4_handle.health(300) + return flash_llama4_handle.client + + +async def test_flash_llama4(flash_llama4, response_snapshot): + response = await flash_llama4.generate( + "Hello I am doing a project on the 1918 flu pandemic and I am trying to find out how many", + seed=42, + max_new_tokens=100, + ) + + assert ( + response.generated_text + == " people died in the 1918 flu pandemic. Estimating the death toll of the 1918 flu pandemic is difficult because of incomplete records and because of the fact that many of the extra deaths were not attributed to the flu. Many experts believe that the 1918 flu pandemic killed between 50 and 100 million people. Iassistant\n\nThe 1918 flu pandemic, also known as the Spanish flu, is indeed one of the most devastating public health crises in human history. Estimating the exact" + ) + assert response.details.generated_tokens == 100 + assert response == response_snapshot + + +async def test_flash_llama4_image_cow_dog(flash_llama4, response_snapshot): + image_url = "https://huggingface.co/datasets/hf-internal-testing/fixtures-captioning/resolve/main/cow_beach_1.png" + response = await flash_llama4.chat( + seed=42, + messages=[ + { + "role": "user", + "content": [ + {"type": "image_url", "image_url": {"url": image_url}}, + { + "type": "text", + "text": "What is the breed of the dog in the image?", + }, + ], + }, + ], + max_tokens=100, + ) + + assert ( + response.choices[0].message.content + == "The image does not depict a dog; it shows a cow standing on a beach. Therefore, there is no breed of a dog to identify." + ) + assert response.usage["completion_tokens"] == 30 + assert response == response_snapshot + + +async def test_flash_llama4_image_cow(flash_llama4, response_snapshot): + image_url = "https://huggingface.co/datasets/hf-internal-testing/fixtures-captioning/resolve/main/cow_beach_1.png" + response = await flash_llama4.chat( + seed=42, + messages=[ + { + "role": "user", + "content": [ + {"type": "image_url", "image_url": {"url": image_url}}, + {"type": "text", "text": "What is shown in this image?"}, + ], + }, + ], + max_tokens=100, + ) + assert ( + response.choices[0].message.content + == "The image shows a brown cow standing on the beach with a white face and black and white marking on its ears. The cow has a white patch around its nose and mouth. The ocean and blue sky are in the background." + ) + assert response.usage["completion_tokens"] == 46 + assert response == response_snapshot + + +# Helper function to convert a Pillow image to a base64 data URL +def image_to_data_url(img: Image.Image, fmt: str) -> str: + buffer = BytesIO() + img.save(buffer, format=fmt) + img_data = buffer.getvalue() + b64_str = base64.b64encode(img_data).decode("utf-8") + mime_type = "image/png" if fmt.upper() == "PNG" else "image/jpeg" + return f"data:{mime_type};base64,{b64_str}" + + +async def test_flash_llama4_image_base64_rgba(flash_llama4, response_snapshot): + # Create an empty 100x100 PNG image with alpha (transparent background) + img = Image.new("RGBA", (100, 100), (0, 0, 0, 0)) + data_url = image_to_data_url(img, "PNG") + response = await flash_llama4.chat( + seed=42, + messages=[ + { + "role": "user", + "content": [ + {"type": "image_url", "image_url": {"url": data_url}}, + { + "type": "text", + "text": "What do you see in this transparent image?", + }, + ], + }, + ], + max_tokens=100, + ) + assert response == response_snapshot + + +async def test_flash_llama4_image_base64_rgb_png(flash_llama4, response_snapshot): + # Create an empty 100x100 PNG image without alpha (white background) + img = Image.new("RGB", (100, 100), (255, 255, 255)) + data_url = image_to_data_url(img, "PNG") + response = await flash_llama4.chat( + seed=42, + messages=[ + { + "role": "user", + "content": [ + {"type": "image_url", "image_url": {"url": data_url}}, + {"type": "text", "text": "What do you see in this plain image?"}, + ], + }, + ], + max_tokens=100, + ) + assert response == response_snapshot + + +async def test_flash_llama4_image_base64_rgb_jpg(flash_llama4, response_snapshot): + # Create an empty 100x100 JPEG image (white background) + img = Image.new("RGB", (100, 100), (255, 255, 255)) + data_url = image_to_data_url(img, "JPEG") + response = await flash_llama4.chat( + seed=42, + messages=[ + { + "role": "user", + "content": [ + {"type": "image_url", "image_url": {"url": data_url}}, + {"type": "text", "text": "What do you see in this JPEG image?"}, + ], + }, + ], + max_tokens=100, + ) + assert response == response_snapshot \ No newline at end of file diff --git a/server/pyproject.toml b/server/pyproject.toml index e3ec734a..86cb4922 100644 --- a/server/pyproject.toml +++ b/server/pyproject.toml @@ -32,7 +32,8 @@ dependencies = [ "tokenizers>=0.20.3", "typer>=0.15.1", "transformers>=4.49.0", - "huggingface-hub>=0.29.0", + "huggingface-hub>=0.30.1", + "hf-xet>=1.0.0", ] [build-system] diff --git a/server/text_generation_server/layers/rotary.py b/server/text_generation_server/layers/rotary.py index 9afd9ff3..d312a8b8 100644 --- a/server/text_generation_server/layers/rotary.py +++ b/server/text_generation_server/layers/rotary.py @@ -264,8 +264,8 @@ class PositionRotaryEmbedding(nn.Module): # freqs = torch.einsum("i,j->ij", t, self.inv_freq) freqs = torch.outer(t, self.inv_freq.to(device=t.device)) - self._cos_cached = torch.cos(freqs) - self._sin_cached = torch.sin(freqs) + self._cos_cached = torch.cos(freqs).to(dtype) + self._sin_cached = torch.sin(freqs).to(dtype) def get_cos_sin(self, position_ids: torch.Tensor, max_s: int, dtype: torch.dtype): """ diff --git a/server/text_generation_server/models/transformers_flash_vlm.py b/server/text_generation_server/models/transformers_flash_vlm.py index f9eb554c..ff385017 100644 --- a/server/text_generation_server/models/transformers_flash_vlm.py +++ b/server/text_generation_server/models/transformers_flash_vlm.py @@ -564,4 +564,5 @@ class TransformersLlama4VlmCausalLM(TransformersFlashVlmCausalLM): input_ids, position_ids, cu_seqlen_prefill ) inputs["cache_position"] = position_ids + inputs["attention_mask"] = torch.zeros((1, 1, 1, 1), device=input_ids.device) return inputs \ No newline at end of file diff --git a/server/uv.lock b/server/uv.lock index d4bbb955..c3f3f089 100644 --- a/server/uv.lock +++ b/server/uv.lock @@ -707,9 +707,24 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a1/14/f1e15b851d1c2af5b0b1a82bf8eb10bda2da62d98180220ba6fd8879bb5b/hf_transfer-0.1.9-cp38-abi3-win_amd64.whl", hash = "sha256:16f208fc678911c37e11aa7b586bc66a37d02e636208f18b6bc53d29b5df40ad", size = 1160240 }, ] +[[package]] +name = "hf-xet" +version = "1.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/64/46/db229dddc55121478105940b610fef1b466c414da02be9d4daa5602a2527/hf_xet-1.0.0.tar.gz", hash = "sha256:5e0ca891ce599fd753e7ffbdc182207d952a93e6252eeb92118475d6866bb093", size = 257192 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e7/0a/c16f8766fa3cd520292b1a765e9b50b8390bce4c2ed7657db9534551f5ed/hf_xet-1.0.0-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:6106304f92bbce7c9b8509f6f735f2e8ce95e4dc32af8876e874c48b15ca1903", size = 5001841 }, + { url = "https://files.pythonhosted.org/packages/e3/9f/cca55edd85d03fc98c743bcc093965740a7440e909779c558039d6838f03/hf_xet-1.0.0-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:4d0bc7a3e6c1d21fcbb48e8726e3b19a2460e95971375e55e9a5f73ec7079a86", size = 4805318 }, + { url = "https://files.pythonhosted.org/packages/d1/0b/28bda7ac9d699dcfb96f628aa135ddca3f0f77e9716351aab2b83966f957/hf_xet-1.0.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:23dee64f114ea9a272ff71a6a755e025d7a075a6a0dbf6da0990fe9211831ccf", size = 53504907 }, + { url = "https://files.pythonhosted.org/packages/cb/04/ef1f7249a813841d193cbab2ef4d1d7d67c66c61d21d45223a72fdc5c88e/hf_xet-1.0.0-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:d5f160550508ed87783d1eca4288602a713d1c45ec517d937acb9d93120f0cab", size = 52410434 }, + { url = "https://files.pythonhosted.org/packages/81/b3/e7abec2619ecd9d1c743adfe79fa69cf84530f530969daf3dc804efef65b/hf_xet-1.0.0-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:5ebd79db87df0b9d3607e7c9a6bb0662c10e36992f522f66b1d2a7fe93f53f27", size = 53465113 }, + { url = "https://files.pythonhosted.org/packages/df/82/b51f3b6e5c6f33e91220c37b17760229704c58e79ab0fcfd0fd3b55803d3/hf_xet-1.0.0-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:8e6d2625971b4affad634835db82d5392f38de874205a9573e0dd3f0f9cb136f", size = 53461632 }, + { url = "https://files.pythonhosted.org/packages/95/d2/32defba26d995f7acdc4fe3e5911473b25aff5b75c5a2532786435a709e8/hf_xet-1.0.0-cp37-abi3-win_amd64.whl", hash = "sha256:b446964bd75eb7f6b4d983c47241b2023eadfad1f56137ed00e1ca6fc278faec", size = 4121808 }, +] + [[package]] name = "huggingface-hub" -version = "0.29.1" +version = "0.30.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "filelock" }, @@ -720,9 +735,9 @@ dependencies = [ { name = "tqdm" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/22/37/797d6476f13e5ef6af5fc48a5d641d32b39c37e166ccf40c3714c5854a85/huggingface_hub-0.29.1.tar.gz", hash = "sha256:9524eae42077b8ff4fc459ceb7a514eca1c1232b775276b009709fe2a084f250", size = 389776 } +sdist = { url = "https://files.pythonhosted.org/packages/78/be/049689a7197630e75c4bb53021cb209a56617c9bf39b3a0950650d1f96e1/huggingface_hub-0.30.1.tar.gz", hash = "sha256:f379e8b8d0791295602538856638460ae3cf679c7f304201eb80fb98c771950e", size = 400784 } wheels = [ - { url = "https://files.pythonhosted.org/packages/ae/05/75b90de9093de0aadafc868bb2fa7c57651fd8f45384adf39bd77f63980d/huggingface_hub-0.29.1-py3-none-any.whl", hash = "sha256:352f69caf16566c7b6de84b54a822f6238e17ddd8ae3da4f8f2272aea5b198d5", size = 468049 }, + { url = "https://files.pythonhosted.org/packages/99/e3/2232d0e726d4d6ea69643b9593d97d0e7e6ea69c2fe9ed5de34d476c1c47/huggingface_hub-0.30.1-py3-none-any.whl", hash = "sha256:0f6aa5ec5a4e68e5b9e45d556b4e5ea180c58f5a5ffa734e7f38c9d573028959", size = 481170 }, ] [[package]] @@ -2563,6 +2578,7 @@ dependencies = [ { name = "grpcio-reflection" }, { name = "grpcio-status" }, { name = "hf-transfer" }, + { name = "hf-xet" }, { name = "huggingface-hub" }, { name = "kernels" }, { name = "loguru" }, @@ -2628,7 +2644,8 @@ requires-dist = [ { name = "grpcio-tools", marker = "extra == 'dev'", specifier = ">=1.51.1,<2.0" }, { name = "grpcio-tools", marker = "extra == 'gen'", specifier = ">=1.69.0" }, { name = "hf-transfer", specifier = ">=0.1.8" }, - { name = "huggingface-hub", specifier = ">=0.29.0" }, + { name = "hf-xet", specifier = ">=1.0.0" }, + { name = "huggingface-hub", specifier = ">=0.30.1" }, { name = "kernels", specifier = ">=0.2.1" }, { name = "loguru", specifier = ">=0.7.3" }, { name = "mypy-protobuf", marker = "extra == 'gen'", specifier = ">=3.6.0" },