fix: small refactor and cleanups

This commit is contained in:
drbh 2025-01-03 11:01:07 -05:00
parent dcc1194198
commit b27749eba7
4 changed files with 4 additions and 20 deletions

View File

@ -71,14 +71,5 @@ async def test_qwen2_vl_simpl(qwen2_vl, response_snapshot):
full_text += response["choices"][0]["delta"]["content"]
except json.JSONDecodeError:
pass
# assert count == 27
# assert response.usage == {
# "completion_tokens": 10,
# "prompt_tokens": 50,
# "total_tokens": 60,
# }
# assert (
# response.choices[0].message.content
# == "In a bustling city, a chicken named Cluck"
# )
assert last_response == response_snapshot

View File

@ -21,6 +21,7 @@ itertools = "0.10"
jsonschema = { version = "0.17.1", features = ["draft202012"] }
metrics = { workspace = true }
metrics-exporter-prometheus = { workspace = true }
mp4parse = { version = "0.17.0", optional = true }
nohash-hasher = "0.2.0"
opentelemetry = { version = "0.20.0", features = ["rt-tokio"] }
outlines-core = { git = "https://github.com/dottxt-ai/outlines-core.git", rev = "ba10c619fc9bf3c487e43f49bdecb95a24bb465c" }
@ -29,7 +30,7 @@ rand = "0.8.5"
reqwest = { version = "0.11.20", features = [] }
serde = "1.0.188"
serde_json = "1.0.107"
tempfile = "3.10.1"
tempfile = { version = "3.10.1", optional = true }
thiserror = "1.0.48"
tokenizers = { workspace = true }
tokio = { version = "1.32.0", features = [
@ -66,7 +67,6 @@ uuid = { version = "1.9.1", default-features = false, features = [
csv = "1.3.0"
ureq = "=2.9"
pyo3 = { workspace = true }
mp4parse = "0.17.0"
[build-dependencies]
@ -77,4 +77,4 @@ default = ["ngrok"]
ngrok = ["dep:ngrok"]
google = []
kserve = []
video = ["ffmpeg-next"]
video = ["ffmpeg-next", "mp4parse", "tempfile"]

View File

@ -21,7 +21,6 @@ use tokio::sync::mpsc;
use tokio::sync::oneshot;
use tracing::{instrument, Span};
use {once_cell::sync::Lazy, regex::Regex};
// video processing
#[cfg(feature = "video")]
use ffmpeg_next::{
@ -772,7 +771,6 @@ fn video_tokens(config: &Config, height: u32, width: u32, sampled_frames: f32) -
use Config::*;
match config {
// TOOD: improve to use the config to better estimate the number of tokens
Qwen2Vl(_config) => {
let min_frames = 2_f32;
let max_frames = 256_f32;

View File

@ -80,11 +80,6 @@ def image_text_replacement(processor, image_input, config, image_id: int) -> str
def video_text_replacement(processor, video_input, config) -> str:
if config.model_type == "qwen2_vl":
# num_pads = video_input['pixel_values'].size(0)
# num_pads = 1206
# import ipdb; ipdb.set_trace()
# num_pads = 9556 + 10
num_pads = video_input.pixel_values.shape[0] // 4
padding = "<|video_pad|>" * num_pads
return f"<|vision_start|>{padding}<|vision_end|>"