text-generation-inference/integration-tests/models/test_flash_qwen2_vl_video.py

import pytest
import json
import requests


@pytest.fixture(scope="module")
def qwen2_vl_handle(launcher):
    with launcher(
        "Qwen/Qwen2-VL-7B-Instruct",
        max_input_length=10_000,
        max_batch_prefill_tokens=10_000,
        max_total_tokens=10_001,
        cuda_graphs=[0],
    ) as handle:
        yield handle


@pytest.fixture(scope="module")
async def qwen2_vl(qwen2_vl_handle):
    await qwen2_vl_handle.health(300)
    return qwen2_vl_handle.client


@pytest.mark.asyncio
async def test_qwen2_vl_simpl(qwen2_vl, response_snapshot):
    responses = requests.post(
        f"{qwen2_vl.base_url}/v1/chat/completions",
        headers=qwen2_vl.headers,
        json={
            "model": "tgi",
            "messages": [
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "video_url",
                            "video_url": {
                                "url": "https://test-videos.co.uk/vids/bigbuckbunny/mp4/h264/360/Big_Buck_Bunny_360_10s_1MB.mp4"
                            },
                        },
                        {
                            "type": "text",
                            "text": "Describe this video.",
                        },
                    ],
                },
            ],
            "seed": 42,
            "max_tokens": 100,
            "stream": True,
        },
    )

    # iterate over the response in chunks
    count = 0
    full_text = ""
    last_response = None
    for chunk in responses.iter_content(chunk_size=1024):
        if chunk:
            count += 1
            # remove the "data: " prefix, trailing newline, and split the chunk into individual lines
            lines = chunk.decode("utf-8").replace("data: ", "").rstrip("\n").split("\n")
            for line in lines:
                if line == "[DONE]":
                    break
                print("=", line)
                try:
                    response = json.loads(line)
                    # print(response)
                    last_response = response
                    full_text += response["choices"][0]["delta"]["content"]
                except json.JSONDecodeError:
                    pass
    # assert count == 27
    # assert response.usage == {
    #     "completion_tokens": 10,
    #     "prompt_tokens": 50,
    #     "total_tokens": 60,
    # }
    # assert (
    #     response.choices[0].message.content
    #     == "In a bustling city, a chicken named Cluck"
    # )
    assert last_response == response_snapshot
fix: resolve rebase issues and add test 2024-12-12 18:31:33 +00:00			`import pytest`
			`import json`
			`import requests`


			`@pytest.fixture(scope="module")`
			`def qwen2_vl_handle(launcher):`
			`with launcher(`
			`"Qwen/Qwen2-VL-7B-Instruct",`
			`max_input_length=10_000,`
			`max_batch_prefill_tokens=10_000,`
			`max_total_tokens=10_001,`
			`cuda_graphs=[0],`
			`) as handle:`
			`yield handle`


			`@pytest.fixture(scope="module")`
			`async def qwen2_vl(qwen2_vl_handle):`
			`await qwen2_vl_handle.health(300)`
			`return qwen2_vl_handle.client`


			`@pytest.mark.asyncio`
			`async def test_qwen2_vl_simpl(qwen2_vl, response_snapshot):`
			`responses = requests.post(`
			`f"{qwen2_vl.base_url}/v1/chat/completions",`
			`headers=qwen2_vl.headers,`
			`json={`
			`"model": "tgi",`
			`"messages": [`
			`{`
			`"role": "user",`
			`"content": [`
			`{`
			`"type": "video_url",`
			`"video_url": {`
			`"url": "https://test-videos.co.uk/vids/bigbuckbunny/mp4/h264/360/Big_Buck_Bunny_360_10s_1MB.mp4"`
			`},`
			`},`
			`{`
			`"type": "text",`
			`"text": "Describe this video.",`
			`},`
			`],`
			`},`
			`],`
			`"seed": 42,`
			`"max_tokens": 100,`
			`"stream": True,`
			`},`
			`)`

			`# iterate over the response in chunks`
			`count = 0`
			`full_text = ""`
			`last_response = None`
			`for chunk in responses.iter_content(chunk_size=1024):`
			`if chunk:`
			`count += 1`
			`# remove the "data: " prefix, trailing newline, and split the chunk into individual lines`
			`lines = chunk.decode("utf-8").replace("data: ", "").rstrip("\n").split("\n")`
			`for line in lines:`
			`if line == "[DONE]":`
			`break`
			`print("=", line)`
			`try:`
			`response = json.loads(line)`
			`# print(response)`
			`last_response = response`
			`full_text += response["choices"][0]["delta"]["content"]`
			`except json.JSONDecodeError:`
			`pass`
			`# assert count == 27`
			`# assert response.usage == {`
			`# "completion_tokens": 10,`
			`# "prompt_tokens": 50,`
			`# "total_tokens": 60,`
			`# }`
			`# assert (`
			`# response.choices[0].message.content`
			`# == "In a bustling city, a chicken named Cluck"`
			`# )`
			`assert last_response == response_snapshot`