text-generation-inference/integration-tests/models/test_flash_qwen2_vl_warmup.py

import pytest


@pytest.fixture(scope="module")
def flash_qwen2_vl_handle(launcher):
    with launcher(
        "Qwen/Qwen2-VL-2B-Instruct",
        max_input_length=40,
        max_batch_prefill_tokens=50,
        max_total_tokens=51,
    ) as handle:
        yield handle


@pytest.fixture(scope="module")
async def flash_qwen2(flash_qwen2_vl_handle):
    await flash_qwen2_vl_handle.health(300)
    return flash_qwen2_vl_handle.client


@pytest.mark.private
async def test_flash_qwen2_vl_simple(flash_qwen2, response_snapshot):
    response = await flash_qwen2.chat(
        max_tokens=20,
        seed=42,
        messages=[
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": "What is the color of the sky?"},
                ],
            },
        ],
    )

    assert response.choices[0].message.content == "The correct answer is: blue"

    assert response == response_snapshot
feat: improve qwen2-vl startup (#2802) * feat: tokenize each request individually and increase warmup image size * feat: adjust rotary embed and avoid cuda graphs of size 2 and smaller * fix: address image resize and rebase changes * feat: update to run qwen2-vl tests * fix: tweak param types 2025-01-17 16:50:41 +00:00			`import pytest`


			`@pytest.fixture(scope="module")`
			`def flash_qwen2_vl_handle(launcher):`
			`with launcher(`
			`"Qwen/Qwen2-VL-2B-Instruct",`
			`max_input_length=40,`
			`max_batch_prefill_tokens=50,`
			`max_total_tokens=51,`
			`) as handle:`
			`yield handle`


			`@pytest.fixture(scope="module")`
			`async def flash_qwen2(flash_qwen2_vl_handle):`
			`await flash_qwen2_vl_handle.health(300)`
			`return flash_qwen2_vl_handle.client`


			`@pytest.mark.private`
			`async def test_flash_qwen2_vl_simple(flash_qwen2, response_snapshot):`
			`response = await flash_qwen2.chat(`
			`max_tokens=20,`
			`seed=42,`
			`messages=[`
			`{`
			`"role": "user",`
			`"content": [`
			`{"type": "text", "text": "What is the color of the sky?"},`
			`],`
			`},`
			`],`
			`)`

			`assert response.choices[0].message.content == "The correct answer is: blue"`

			`assert response == response_snapshot`