mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-24 08:22:07 +00:00
85 lines
2.5 KiB
Python
85 lines
2.5 KiB
Python
|
import pytest
|
||
|
import json
|
||
|
import requests
|
||
|
|
||
|
|
||
|
@pytest.fixture(scope="module")
|
||
|
def qwen2_vl_handle(launcher):
|
||
|
with launcher(
|
||
|
"Qwen/Qwen2-VL-7B-Instruct",
|
||
|
max_input_length=10_000,
|
||
|
max_batch_prefill_tokens=10_000,
|
||
|
max_total_tokens=10_001,
|
||
|
cuda_graphs=[0],
|
||
|
) as handle:
|
||
|
yield handle
|
||
|
|
||
|
|
||
|
@pytest.fixture(scope="module")
|
||
|
async def qwen2_vl(qwen2_vl_handle):
|
||
|
await qwen2_vl_handle.health(300)
|
||
|
return qwen2_vl_handle.client
|
||
|
|
||
|
|
||
|
@pytest.mark.asyncio
|
||
|
async def test_qwen2_vl_simpl(qwen2_vl, response_snapshot):
|
||
|
responses = requests.post(
|
||
|
f"{qwen2_vl.base_url}/v1/chat/completions",
|
||
|
headers=qwen2_vl.headers,
|
||
|
json={
|
||
|
"model": "tgi",
|
||
|
"messages": [
|
||
|
{
|
||
|
"role": "user",
|
||
|
"content": [
|
||
|
{
|
||
|
"type": "video_url",
|
||
|
"video_url": {
|
||
|
"url": "https://test-videos.co.uk/vids/bigbuckbunny/mp4/h264/360/Big_Buck_Bunny_360_10s_1MB.mp4"
|
||
|
},
|
||
|
},
|
||
|
{
|
||
|
"type": "text",
|
||
|
"text": "Describe this video.",
|
||
|
},
|
||
|
],
|
||
|
},
|
||
|
],
|
||
|
"seed": 42,
|
||
|
"max_tokens": 100,
|
||
|
"stream": True,
|
||
|
},
|
||
|
)
|
||
|
|
||
|
# iterate over the response in chunks
|
||
|
count = 0
|
||
|
full_text = ""
|
||
|
last_response = None
|
||
|
for chunk in responses.iter_content(chunk_size=1024):
|
||
|
if chunk:
|
||
|
count += 1
|
||
|
# remove the "data: " prefix, trailing newline, and split the chunk into individual lines
|
||
|
lines = chunk.decode("utf-8").replace("data: ", "").rstrip("\n").split("\n")
|
||
|
for line in lines:
|
||
|
if line == "[DONE]":
|
||
|
break
|
||
|
print("=", line)
|
||
|
try:
|
||
|
response = json.loads(line)
|
||
|
# print(response)
|
||
|
last_response = response
|
||
|
full_text += response["choices"][0]["delta"]["content"]
|
||
|
except json.JSONDecodeError:
|
||
|
pass
|
||
|
# assert count == 27
|
||
|
# assert response.usage == {
|
||
|
# "completion_tokens": 10,
|
||
|
# "prompt_tokens": 50,
|
||
|
# "total_tokens": 60,
|
||
|
# }
|
||
|
# assert (
|
||
|
# response.choices[0].message.content
|
||
|
# == "In a bustling city, a chicken named Cluck"
|
||
|
# )
|
||
|
assert last_response == response_snapshot
|