mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-28 21:42:06 +00:00
fix: small refactor and cleanups
This commit is contained in:
parent
dcc1194198
commit
b27749eba7
@ -71,14 +71,5 @@ async def test_qwen2_vl_simpl(qwen2_vl, response_snapshot):
|
|||||||
full_text += response["choices"][0]["delta"]["content"]
|
full_text += response["choices"][0]["delta"]["content"]
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
pass
|
pass
|
||||||
# assert count == 27
|
|
||||||
# assert response.usage == {
|
|
||||||
# "completion_tokens": 10,
|
|
||||||
# "prompt_tokens": 50,
|
|
||||||
# "total_tokens": 60,
|
|
||||||
# }
|
|
||||||
# assert (
|
|
||||||
# response.choices[0].message.content
|
|
||||||
# == "In a bustling city, a chicken named Cluck"
|
|
||||||
# )
|
|
||||||
assert last_response == response_snapshot
|
assert last_response == response_snapshot
|
||||||
|
@ -21,6 +21,7 @@ itertools = "0.10"
|
|||||||
jsonschema = { version = "0.17.1", features = ["draft202012"] }
|
jsonschema = { version = "0.17.1", features = ["draft202012"] }
|
||||||
metrics = { workspace = true }
|
metrics = { workspace = true }
|
||||||
metrics-exporter-prometheus = { workspace = true }
|
metrics-exporter-prometheus = { workspace = true }
|
||||||
|
mp4parse = { version = "0.17.0", optional = true }
|
||||||
nohash-hasher = "0.2.0"
|
nohash-hasher = "0.2.0"
|
||||||
opentelemetry = { version = "0.20.0", features = ["rt-tokio"] }
|
opentelemetry = { version = "0.20.0", features = ["rt-tokio"] }
|
||||||
outlines-core = { git = "https://github.com/dottxt-ai/outlines-core.git", rev = "ba10c619fc9bf3c487e43f49bdecb95a24bb465c" }
|
outlines-core = { git = "https://github.com/dottxt-ai/outlines-core.git", rev = "ba10c619fc9bf3c487e43f49bdecb95a24bb465c" }
|
||||||
@ -29,7 +30,7 @@ rand = "0.8.5"
|
|||||||
reqwest = { version = "0.11.20", features = [] }
|
reqwest = { version = "0.11.20", features = [] }
|
||||||
serde = "1.0.188"
|
serde = "1.0.188"
|
||||||
serde_json = "1.0.107"
|
serde_json = "1.0.107"
|
||||||
tempfile = "3.10.1"
|
tempfile = { version = "3.10.1", optional = true }
|
||||||
thiserror = "1.0.48"
|
thiserror = "1.0.48"
|
||||||
tokenizers = { workspace = true }
|
tokenizers = { workspace = true }
|
||||||
tokio = { version = "1.32.0", features = [
|
tokio = { version = "1.32.0", features = [
|
||||||
@ -66,7 +67,6 @@ uuid = { version = "1.9.1", default-features = false, features = [
|
|||||||
csv = "1.3.0"
|
csv = "1.3.0"
|
||||||
ureq = "=2.9"
|
ureq = "=2.9"
|
||||||
pyo3 = { workspace = true }
|
pyo3 = { workspace = true }
|
||||||
mp4parse = "0.17.0"
|
|
||||||
|
|
||||||
|
|
||||||
[build-dependencies]
|
[build-dependencies]
|
||||||
@ -77,4 +77,4 @@ default = ["ngrok"]
|
|||||||
ngrok = ["dep:ngrok"]
|
ngrok = ["dep:ngrok"]
|
||||||
google = []
|
google = []
|
||||||
kserve = []
|
kserve = []
|
||||||
video = ["ffmpeg-next"]
|
video = ["ffmpeg-next", "mp4parse", "tempfile"]
|
||||||
|
@ -21,7 +21,6 @@ use tokio::sync::mpsc;
|
|||||||
use tokio::sync::oneshot;
|
use tokio::sync::oneshot;
|
||||||
use tracing::{instrument, Span};
|
use tracing::{instrument, Span};
|
||||||
use {once_cell::sync::Lazy, regex::Regex};
|
use {once_cell::sync::Lazy, regex::Regex};
|
||||||
// video processing
|
|
||||||
|
|
||||||
#[cfg(feature = "video")]
|
#[cfg(feature = "video")]
|
||||||
use ffmpeg_next::{
|
use ffmpeg_next::{
|
||||||
@ -772,7 +771,6 @@ fn video_tokens(config: &Config, height: u32, width: u32, sampled_frames: f32) -
|
|||||||
use Config::*;
|
use Config::*;
|
||||||
|
|
||||||
match config {
|
match config {
|
||||||
// TOOD: improve to use the config to better estimate the number of tokens
|
|
||||||
Qwen2Vl(_config) => {
|
Qwen2Vl(_config) => {
|
||||||
let min_frames = 2_f32;
|
let min_frames = 2_f32;
|
||||||
let max_frames = 256_f32;
|
let max_frames = 256_f32;
|
||||||
|
@ -80,11 +80,6 @@ def image_text_replacement(processor, image_input, config, image_id: int) -> str
|
|||||||
|
|
||||||
def video_text_replacement(processor, video_input, config) -> str:
|
def video_text_replacement(processor, video_input, config) -> str:
|
||||||
if config.model_type == "qwen2_vl":
|
if config.model_type == "qwen2_vl":
|
||||||
# num_pads = video_input['pixel_values'].size(0)
|
|
||||||
# num_pads = 1206
|
|
||||||
|
|
||||||
# import ipdb; ipdb.set_trace()
|
|
||||||
# num_pads = 9556 + 10
|
|
||||||
num_pads = video_input.pixel_values.shape[0] // 4
|
num_pads = video_input.pixel_values.shape[0] // 4
|
||||||
padding = "<|video_pad|>" * num_pads
|
padding = "<|video_pad|>" * num_pads
|
||||||
return f"<|vision_start|>{padding}<|vision_end|>"
|
return f"<|vision_start|>{padding}<|vision_end|>"
|
||||||
|
Loading…
Reference in New Issue
Block a user