diff --git a/integration-tests/models/__snapshots__/test_flash_qwen2_vl_video/test_qwen2_vl_simpl.json b/integration-tests/models/__snapshots__/test_flash_qwen2_vl_video/test_qwen2_vl_simpl.json new file mode 100644 index 00000000..612edb07 --- /dev/null +++ b/integration-tests/models/__snapshots__/test_flash_qwen2_vl_video/test_qwen2_vl_simpl.json @@ -0,0 +1,19 @@ +{ + "choices": [ + { + "delta": { + "content": "", + "role": "assistant" + }, + "finish_reason": "stop", + "index": 0, + "logprobs": null + } + ], + "created": 1733450914, + "id": "", + "model": "Qwen/Qwen2-VL-7B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "2.4.2-dev0-native", + "usage": null +} diff --git a/integration-tests/models/test_flash_qwen2_vl_video.py b/integration-tests/models/test_flash_qwen2_vl_video.py new file mode 100644 index 00000000..b5b7dc01 --- /dev/null +++ b/integration-tests/models/test_flash_qwen2_vl_video.py @@ -0,0 +1,84 @@ +import pytest +import json +import requests + + +@pytest.fixture(scope="module") +def qwen2_vl_handle(launcher): + with launcher( + "Qwen/Qwen2-VL-7B-Instruct", + max_input_length=10_000, + max_batch_prefill_tokens=10_000, + max_total_tokens=10_001, + cuda_graphs=[0], + ) as handle: + yield handle + + +@pytest.fixture(scope="module") +async def qwen2_vl(qwen2_vl_handle): + await qwen2_vl_handle.health(300) + return qwen2_vl_handle.client + + +@pytest.mark.asyncio +async def test_qwen2_vl_simpl(qwen2_vl, response_snapshot): + responses = requests.post( + f"{qwen2_vl.base_url}/v1/chat/completions", + headers=qwen2_vl.headers, + json={ + "model": "tgi", + "messages": [ + { + "role": "user", + "content": [ + { + "type": "video_url", + "video_url": { + "url": "https://test-videos.co.uk/vids/bigbuckbunny/mp4/h264/360/Big_Buck_Bunny_360_10s_1MB.mp4" + }, + }, + { + "type": "text", + "text": "Describe this video.", + }, + ], + }, + ], + "seed": 42, + "max_tokens": 100, + "stream": True, + }, + ) + + # iterate over the response in chunks + count = 0 + full_text = "" + last_response = None + for chunk in responses.iter_content(chunk_size=1024): + if chunk: + count += 1 + # remove the "data: " prefix, trailing newline, and split the chunk into individual lines + lines = chunk.decode("utf-8").replace("data: ", "").rstrip("\n").split("\n") + for line in lines: + if line == "[DONE]": + break + print("=", line) + try: + response = json.loads(line) + # print(response) + last_response = response + full_text += response["choices"][0]["delta"]["content"] + except json.JSONDecodeError: + pass + # assert count == 27 + # assert response.usage == { + # "completion_tokens": 10, + # "prompt_tokens": 50, + # "total_tokens": 60, + # } + # assert ( + # response.choices[0].message.content + # == "In a bustling city, a chicken named Cluck" + # ) + assert last_response == response_snapshot diff --git a/router/src/validation.rs b/router/src/validation.rs index 17808b3c..461f43cb 100644 --- a/router/src/validation.rs +++ b/router/src/validation.rs @@ -21,6 +21,11 @@ use tokio::sync::mpsc; use tokio::sync::oneshot; use tracing::{instrument, Span}; use {once_cell::sync::Lazy, regex::Regex}; +// video processing +use ffmpeg_next::format::Pixel; +use ffmpeg_next::media::Type; +use ffmpeg_next::software::scaling::{context::Context, flag::Flags}; +use std::io::Write; static DEFAULT_GENERATION_LENGTH: u32 = 1024; @@ -536,7 +541,11 @@ fn format_to_mimetype(format: ImageFormat) -> String { .to_string() } -pub fn fetch_video(input: &str) -> Result<(Vec, String, usize, usize, f32), ValidationError> { +pub fn fetch_video( + input: &str, + target_width: u32, + target_height: u32, +) -> Result { let (data, mimetype) = if input.starts_with("