fix: small refactor and cleanups

2025-09-17 15:24:52 +00:00 · 2025-01-03 11:01:07 -05:00 · 2025-01-03 11:01:07 -05:00 · b27749eba7
commit b27749eba7
parent dcc1194198
4 changed files with 4 additions and 20 deletions
--- a/integration-tests/models/test_flash_qwen2_vl_video.py
+++ b/integration-tests/models/test_flash_qwen2_vl_video.py
@ -71,14 +71,5 @@ async def test_qwen2_vl_simpl(qwen2_vl, response_snapshot):
                    full_text += response["choices"][0]["delta"]["content"]
                except json.JSONDecodeError:
                    pass
-    # assert count == 27
+
    # assert response.usage == {
    #     "completion_tokens": 10,
    #     "prompt_tokens": 50,
    #     "total_tokens": 60,
    # }
    # assert (
    #     response.choices[0].message.content
    #     == "In a bustling city, a chicken named Cluck"
    # )
    assert last_response == response_snapshot
--- a/router/Cargo.toml
+++ b/router/Cargo.toml
@ -21,6 +21,7 @@ itertools = "0.10"
 jsonschema = { version = "0.17.1", features = ["draft202012"] }
 metrics = { workspace = true }
 metrics-exporter-prometheus = { workspace = true }
 mp4parse = { version = "0.17.0", optional = true }
 nohash-hasher = "0.2.0"
 opentelemetry = { version = "0.20.0", features = ["rt-tokio"] }
 outlines-core = { git = "https://github.com/dottxt-ai/outlines-core.git", rev = "ba10c619fc9bf3c487e43f49bdecb95a24bb465c" }
@ -29,7 +30,7 @@ rand = "0.8.5"
 reqwest = { version = "0.11.20", features = [] }
 serde = "1.0.188"
 serde_json = "1.0.107"
-tempfile = "3.10.1"
+tempfile = { version = "3.10.1", optional = true }
 thiserror = "1.0.48"
 tokenizers = { workspace = true }
 tokio = { version = "1.32.0", features = [
@ -66,7 +67,6 @@ uuid = { version = "1.9.1", default-features = false, features = [
 csv = "1.3.0"
 ureq = "=2.9"
 pyo3 = { workspace = true }
 mp4parse = "0.17.0"
 [build-dependencies]
@ -77,4 +77,4 @@ default = ["ngrok"]
 ngrok = ["dep:ngrok"]
 google = []
 kserve = []
-video = ["ffmpeg-next"]
+video = ["ffmpeg-next", "mp4parse", "tempfile"]
--- a/router/src/validation.rs
+++ b/router/src/validation.rs
@ -21,7 +21,6 @@ use tokio::sync::mpsc;
 use tokio::sync::oneshot;
 use tracing::{instrument, Span};
 use {once_cell::sync::Lazy, regex::Regex};
 // video processing
 #[cfg(feature = "video")]
 use ffmpeg_next::{
@ -772,7 +771,6 @@ fn video_tokens(config: &Config, height: u32, width: u32, sampled_frames: f32) -
    use Config::*;
    match config {
        // TOOD: improve to use the config to better estimate the number of tokens
        Qwen2Vl(_config) => {
            let min_frames = 2_f32;
            let max_frames = 256_f32;
--- a/server/text_generation_server/models/vlm_causal_lm.py
+++ b/server/text_generation_server/models/vlm_causal_lm.py
@ -80,11 +80,6 @@ def image_text_replacement(processor, image_input, config, image_id: int) -> str
 def video_text_replacement(processor, video_input, config) -> str:
    if config.model_type == "qwen2_vl":
        # num_pads = video_input['pixel_values'].size(0)
        # num_pads = 1206
        # import ipdb; ipdb.set_trace()
        # num_pads = 9556 + 10
        num_pads = video_input.pixel_values.shape[0] // 4
        padding = "<|video_pad|>" * num_pads
        return f"<|vision_start|>{padding}<|vision_end|>"