mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-19 22:02:06 +00:00
* feat: refactor model, improve startup and re enable tests * fix: improve multimodal rotary embed caching * fix: limit vision flop calc to qwen2 vl models and update config typing * fix: include clippy lint * feat: refactor position ids in warmup and bump tests * fix: prefer default dtype * fix: enable all cuda graphs and bump snapshots * fix: adjust rotaty init path * fix: simplify get position ids and remove usused vision config * fix: update position ids so first dim is batch, simplify rotary and bump vlm default token limit * fix: improve position id init during cuda warmup for mrope and simplfy rotary forward * fix: check existance before accessing rope type in cuda warmup * fix: check key before access * fix: improve mrope check in cuda graph warmup * fix: remove check for default rope type * fix: add more test and improve model generation * fix: improve and simplify get_cos_sin, refactors and cleanup get_position_ids * fix: adjust signatures with types
123 lines
4.3 KiB
Python
123 lines
4.3 KiB
Python
import pytest
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
def flash_qwen2_vl_handle(launcher):
|
|
with launcher("Qwen/Qwen2-VL-7B-Instruct") as handle:
|
|
yield handle
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
async def flash_qwen2(flash_qwen2_vl_handle):
|
|
await flash_qwen2_vl_handle.health(300)
|
|
return flash_qwen2_vl_handle.client
|
|
|
|
|
|
@pytest.mark.private
|
|
async def test_flash_qwen2_vl_simple(flash_qwen2, response_snapshot):
|
|
response = await flash_qwen2.chat(
|
|
seed=42,
|
|
messages=[
|
|
{
|
|
"role": "user",
|
|
"content": [
|
|
{
|
|
"type": "image_url",
|
|
"image_url": {
|
|
"url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/rabbit.png"
|
|
},
|
|
},
|
|
{"type": "text", "text": "Describe this image."},
|
|
],
|
|
},
|
|
],
|
|
)
|
|
|
|
assert (
|
|
response.choices[0].message.content
|
|
== "The image depicts an anthropomorphic rabbit, wearing a spacesuit, standing in a barren, rocky landscape that resembles the surface of another planet, possibly Mars. The rabbit has a red digestive system label on its chest, and the surrounding environment features red sandy terrain and a hazy, floating planet or moon in the background. The scene has a surreal, fantastical quality, blending elements of science fiction and space exploration with a whimsical character."
|
|
)
|
|
|
|
assert response == response_snapshot
|
|
|
|
|
|
@pytest.mark.private
|
|
async def test_flash_qwen2_vl_simple_streaming(flash_qwen2, response_snapshot):
|
|
responses = await flash_qwen2.chat(
|
|
seed=42,
|
|
messages=[
|
|
{
|
|
"role": "user",
|
|
"content": [
|
|
{
|
|
"type": "image_url",
|
|
"image_url": {
|
|
"url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/rabbit.png"
|
|
},
|
|
},
|
|
{"type": "text", "text": "Describe this image."},
|
|
],
|
|
},
|
|
],
|
|
stream=True,
|
|
)
|
|
|
|
count = 0
|
|
generated = ""
|
|
last_response = None
|
|
async for response in responses:
|
|
count += 1
|
|
generated += response.choices[0].delta.content
|
|
last_response = response
|
|
|
|
assert (
|
|
generated
|
|
== "The image depicts an anthropomorphic rabbit, wearing a spacesuit, standing in a barren, rocky landscape that resembles the surface of another planet, possibly Mars. The rabbit has a red digestive system label on its chest, and the surrounding environment features red sandy terrain and a hazy, floating planet or moon in the background. The scene has a surreal, fantastical quality, blending elements of science fiction and space exploration with a whimsical character."
|
|
)
|
|
assert count == 89
|
|
assert last_response == response_snapshot
|
|
|
|
|
|
@pytest.mark.private
|
|
async def test_flash_qwen2_vl_bay(flash_qwen2, response_snapshot):
|
|
response = await flash_qwen2.chat(
|
|
seed=42,
|
|
messages=[
|
|
{
|
|
"role": "user",
|
|
"content": [
|
|
{
|
|
"type": "image_url",
|
|
"image_url": {
|
|
"url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
|
|
},
|
|
},
|
|
{"type": "text", "text": "Describe the image"},
|
|
],
|
|
},
|
|
],
|
|
)
|
|
assert response == response_snapshot
|
|
|
|
|
|
@pytest.mark.private
|
|
async def test_flash_qwen2_vl_inpaint(flash_qwen2, response_snapshot):
|
|
response = await flash_qwen2.chat(
|
|
seed=42,
|
|
messages=[
|
|
{
|
|
"role": "user",
|
|
"content": [
|
|
{
|
|
"type": "image_url",
|
|
"image_url": {
|
|
"url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/autopipeline-inpaint.png"
|
|
},
|
|
},
|
|
{"type": "text", "text": "Describe the image"},
|
|
],
|
|
},
|
|
],
|
|
)
|
|
assert response == response_snapshot
|