text-generation-inference/integration-tests/models/test_flash_qwen2_vl_warmup.py

import pytest


@pytest.fixture(scope="module")
def flash_qwen2_vl_handle(launcher):
    with launcher(
        "Qwen/Qwen2-VL-2B-Instruct",
        max_input_length=40,
        max_batch_prefill_tokens=50,
        max_total_tokens=51,
    ) as handle:
        yield handle


@pytest.fixture(scope="module")
async def flash_qwen2(flash_qwen2_vl_handle):
    await flash_qwen2_vl_handle.health(300)
    return flash_qwen2_vl_handle.client


@pytest.mark.private
async def test_flash_qwen2_vl_simple(flash_qwen2, response_snapshot):
    response = await flash_qwen2.chat(
        max_tokens=20,
        seed=42,
        messages=[
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": "What is the color of the sky?"},
                ],
            },
        ],
    )

    assert response.choices[0].message.content == "The correct answer is: blue"

    assert response == response_snapshot