mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-25 20:12:07 +00:00
fix: small refactor and cleanups
This commit is contained in:
parent
dcc1194198
commit
b27749eba7
@ -71,14 +71,5 @@ async def test_qwen2_vl_simpl(qwen2_vl, response_snapshot):
|
||||
full_text += response["choices"][0]["delta"]["content"]
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
# assert count == 27
|
||||
# assert response.usage == {
|
||||
# "completion_tokens": 10,
|
||||
# "prompt_tokens": 50,
|
||||
# "total_tokens": 60,
|
||||
# }
|
||||
# assert (
|
||||
# response.choices[0].message.content
|
||||
# == "In a bustling city, a chicken named Cluck"
|
||||
# )
|
||||
|
||||
assert last_response == response_snapshot
|
||||
|
@ -21,6 +21,7 @@ itertools = "0.10"
|
||||
jsonschema = { version = "0.17.1", features = ["draft202012"] }
|
||||
metrics = { workspace = true }
|
||||
metrics-exporter-prometheus = { workspace = true }
|
||||
mp4parse = { version = "0.17.0", optional = true }
|
||||
nohash-hasher = "0.2.0"
|
||||
opentelemetry = { version = "0.20.0", features = ["rt-tokio"] }
|
||||
outlines-core = { git = "https://github.com/dottxt-ai/outlines-core.git", rev = "ba10c619fc9bf3c487e43f49bdecb95a24bb465c" }
|
||||
@ -29,7 +30,7 @@ rand = "0.8.5"
|
||||
reqwest = { version = "0.11.20", features = [] }
|
||||
serde = "1.0.188"
|
||||
serde_json = "1.0.107"
|
||||
tempfile = "3.10.1"
|
||||
tempfile = { version = "3.10.1", optional = true }
|
||||
thiserror = "1.0.48"
|
||||
tokenizers = { workspace = true }
|
||||
tokio = { version = "1.32.0", features = [
|
||||
@ -66,7 +67,6 @@ uuid = { version = "1.9.1", default-features = false, features = [
|
||||
csv = "1.3.0"
|
||||
ureq = "=2.9"
|
||||
pyo3 = { workspace = true }
|
||||
mp4parse = "0.17.0"
|
||||
|
||||
|
||||
[build-dependencies]
|
||||
@ -77,4 +77,4 @@ default = ["ngrok"]
|
||||
ngrok = ["dep:ngrok"]
|
||||
google = []
|
||||
kserve = []
|
||||
video = ["ffmpeg-next"]
|
||||
video = ["ffmpeg-next", "mp4parse", "tempfile"]
|
||||
|
@ -21,7 +21,6 @@ use tokio::sync::mpsc;
|
||||
use tokio::sync::oneshot;
|
||||
use tracing::{instrument, Span};
|
||||
use {once_cell::sync::Lazy, regex::Regex};
|
||||
// video processing
|
||||
|
||||
#[cfg(feature = "video")]
|
||||
use ffmpeg_next::{
|
||||
@ -772,7 +771,6 @@ fn video_tokens(config: &Config, height: u32, width: u32, sampled_frames: f32) -
|
||||
use Config::*;
|
||||
|
||||
match config {
|
||||
// TOOD: improve to use the config to better estimate the number of tokens
|
||||
Qwen2Vl(_config) => {
|
||||
let min_frames = 2_f32;
|
||||
let max_frames = 256_f32;
|
||||
|
@ -80,11 +80,6 @@ def image_text_replacement(processor, image_input, config, image_id: int) -> str
|
||||
|
||||
def video_text_replacement(processor, video_input, config) -> str:
|
||||
if config.model_type == "qwen2_vl":
|
||||
# num_pads = video_input['pixel_values'].size(0)
|
||||
# num_pads = 1206
|
||||
|
||||
# import ipdb; ipdb.set_trace()
|
||||
# num_pads = 9556 + 10
|
||||
num_pads = video_input.pixel_values.shape[0] // 4
|
||||
padding = "<|video_pad|>" * num_pads
|
||||
return f"<|vision_start|>{padding}<|vision_end|>"
|
||||
|
Loading…
Reference in New Issue
Block a user