mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-20 22:32:07 +00:00
moving video sampling and resize to validation. downstream we receive frames
This commit is contained in:
parent
322165d767
commit
e65ead12bb
71
Cargo.lock
generated
71
Cargo.lock
generated
@ -267,7 +267,7 @@ version = "0.23.0"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "ad3a619a9de81e1d7de1f1186dcba4506ed661a0e483d84410fdef0ee87b2f96"
|
checksum = "ad3a619a9de81e1d7de1f1186dcba4506ed661a0e483d84410fdef0ee87b2f96"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"bindgen",
|
"bindgen 0.69.5",
|
||||||
"cc",
|
"cc",
|
||||||
"cmake",
|
"cmake",
|
||||||
"dunce",
|
"dunce",
|
||||||
@ -454,6 +454,24 @@ dependencies = [
|
|||||||
"which",
|
"which",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "bindgen"
|
||||||
|
version = "0.70.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "f49d8fed880d473ea71efb9bf597651e77201bdd4893efe54c9e5d65ae04ce6f"
|
||||||
|
dependencies = [
|
||||||
|
"bitflags 2.6.0",
|
||||||
|
"cexpr",
|
||||||
|
"clang-sys",
|
||||||
|
"itertools 0.13.0",
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"regex",
|
||||||
|
"rustc-hash",
|
||||||
|
"shlex",
|
||||||
|
"syn 2.0.89",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "bit-set"
|
name = "bit-set"
|
||||||
version = "0.5.3"
|
version = "0.5.3"
|
||||||
@ -1237,6 +1255,31 @@ dependencies = [
|
|||||||
"simd-adler32",
|
"simd-adler32",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "ffmpeg-next"
|
||||||
|
version = "7.1.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "da02698288e0275e442a47fc12ca26d50daf0d48b15398ba5906f20ac2e2a9f9"
|
||||||
|
dependencies = [
|
||||||
|
"bitflags 2.6.0",
|
||||||
|
"ffmpeg-sys-next",
|
||||||
|
"libc",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "ffmpeg-sys-next"
|
||||||
|
version = "7.1.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "2bc3234d0a4b2f7d083699d0860c6c9dd83713908771b60f94a96f8704adfe45"
|
||||||
|
dependencies = [
|
||||||
|
"bindgen 0.70.1",
|
||||||
|
"cc",
|
||||||
|
"libc",
|
||||||
|
"num_cpus",
|
||||||
|
"pkg-config",
|
||||||
|
"vcpkg",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "fixedbitset"
|
name = "fixedbitset"
|
||||||
version = "0.4.2"
|
version = "0.4.2"
|
||||||
@ -3014,17 +3057,6 @@ dependencies = [
|
|||||||
"num-traits",
|
"num-traits",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "outlines-core"
|
|
||||||
version = "0.1.0"
|
|
||||||
source = "git+https://github.com/dottxt-ai/outlines-core.git?rev=ba10c619fc9bf3c487e43f49bdecb95a24bb465c#ba10c619fc9bf3c487e43f49bdecb95a24bb465c"
|
|
||||||
dependencies = [
|
|
||||||
"anyhow",
|
|
||||||
"regex",
|
|
||||||
"serde-pyobject",
|
|
||||||
"serde_json",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "overload"
|
name = "overload"
|
||||||
version = "0.1.1"
|
version = "0.1.1"
|
||||||
@ -3972,16 +4004,6 @@ dependencies = [
|
|||||||
"serde_derive",
|
"serde_derive",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "serde-pyobject"
|
|
||||||
version = "0.4.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "ca4b0aad8b225845739a0030a0d5cc2ae949c56a86a7daf9226c7df7c2016d16"
|
|
||||||
dependencies = [
|
|
||||||
"pyo3",
|
|
||||||
"serde",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "serde_cbor"
|
name = "serde_cbor"
|
||||||
version = "0.11.2"
|
version = "0.11.2"
|
||||||
@ -4009,7 +4031,6 @@ version = "1.0.133"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "c7fceb2473b9166b2294ef05efcb65a3db80803f0b03ef86a5fc88a2b85ee377"
|
checksum = "c7fceb2473b9166b2294ef05efcb65a3db80803f0b03ef86a5fc88a2b85ee377"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"indexmap 2.6.0",
|
|
||||||
"itoa",
|
"itoa",
|
||||||
"memchr",
|
"memchr",
|
||||||
"ryu",
|
"ryu",
|
||||||
@ -4458,7 +4479,6 @@ dependencies = [
|
|||||||
name = "text-generation-router"
|
name = "text-generation-router"
|
||||||
version = "3.0.2-dev0"
|
version = "3.0.2-dev0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
|
||||||
"async-stream",
|
"async-stream",
|
||||||
"async-trait",
|
"async-trait",
|
||||||
"axum 0.7.9",
|
"axum 0.7.9",
|
||||||
@ -4466,6 +4486,7 @@ dependencies = [
|
|||||||
"base64 0.22.1",
|
"base64 0.22.1",
|
||||||
"clap 4.5.21",
|
"clap 4.5.21",
|
||||||
"csv",
|
"csv",
|
||||||
|
"ffmpeg-next",
|
||||||
"futures",
|
"futures",
|
||||||
"futures-util",
|
"futures-util",
|
||||||
"hf-hub",
|
"hf-hub",
|
||||||
@ -4483,7 +4504,6 @@ dependencies = [
|
|||||||
"once_cell",
|
"once_cell",
|
||||||
"opentelemetry 0.20.0",
|
"opentelemetry 0.20.0",
|
||||||
"opentelemetry-otlp",
|
"opentelemetry-otlp",
|
||||||
"outlines-core",
|
|
||||||
"pyo3",
|
"pyo3",
|
||||||
"rand",
|
"rand",
|
||||||
"regex",
|
"regex",
|
||||||
@ -4491,6 +4511,7 @@ dependencies = [
|
|||||||
"serde",
|
"serde",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
"sysinfo",
|
"sysinfo",
|
||||||
|
"tempfile",
|
||||||
"thiserror",
|
"thiserror",
|
||||||
"tokenizers",
|
"tokenizers",
|
||||||
"tokio",
|
"tokio",
|
||||||
|
14
Dockerfile
14
Dockerfile
@ -20,6 +20,20 @@ FROM chef AS builder
|
|||||||
|
|
||||||
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
|
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
|
||||||
python3.11-dev
|
python3.11-dev
|
||||||
|
|
||||||
|
RUN apt-get update && apt-get install -y \
|
||||||
|
ffmpeg \
|
||||||
|
libavcodec-dev \
|
||||||
|
libavfilter-dev \
|
||||||
|
libavdevice-dev \
|
||||||
|
libavformat-dev \
|
||||||
|
libavutil-dev \
|
||||||
|
libswscale-dev \
|
||||||
|
pkg-config \
|
||||||
|
libclang-dev \
|
||||||
|
clang \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \
|
RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \
|
||||||
curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \
|
curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \
|
||||||
unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \
|
unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \
|
||||||
|
@ -440,7 +440,7 @@ impl State {
|
|||||||
mimetype: image.mimetype,
|
mimetype: image.mimetype,
|
||||||
}),
|
}),
|
||||||
Chunk::Video(video) => client::Chunk::Video(client::Video {
|
Chunk::Video(video) => client::Chunk::Video(client::Video {
|
||||||
data: video.data,
|
data: video.frames,
|
||||||
mimetype: video.mimetype,
|
mimetype: video.mimetype,
|
||||||
}),
|
}),
|
||||||
}),
|
}),
|
||||||
|
@ -8,7 +8,8 @@ authors.workspace = true
|
|||||||
homepage.workspace = true
|
homepage.workspace = true
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
anyhow = "1"
|
ffmpeg-next = "7.1.0"
|
||||||
|
tempfile = "3.10.1"
|
||||||
async-trait = "0.1.74"
|
async-trait = "0.1.74"
|
||||||
async-stream = "0.3.5"
|
async-stream = "0.3.5"
|
||||||
axum = { version = "0.7", features = ["json"] }
|
axum = { version = "0.7", features = ["json"] }
|
||||||
@ -23,7 +24,6 @@ metrics-exporter-prometheus = { workspace = true }
|
|||||||
nohash-hasher = "0.2.0"
|
nohash-hasher = "0.2.0"
|
||||||
opentelemetry = { version = "0.20.0", features = ["rt-tokio"] }
|
opentelemetry = { version = "0.20.0", features = ["rt-tokio"] }
|
||||||
opentelemetry-otlp = "0.13.0"
|
opentelemetry-otlp = "0.13.0"
|
||||||
outlines-core = { git = "https://github.com/dottxt-ai/outlines-core.git", rev = "ba10c619fc9bf3c487e43f49bdecb95a24bb465c" }
|
|
||||||
rand = "0.8.5"
|
rand = "0.8.5"
|
||||||
reqwest = { version = "0.11.20", features = [] }
|
reqwest = { version = "0.11.20", features = [] }
|
||||||
serde = "1.0.188"
|
serde = "1.0.188"
|
||||||
|
@ -2,7 +2,7 @@ import torch
|
|||||||
from PIL import Image
|
from PIL import Image
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
from opentelemetry import trace
|
from opentelemetry import trace
|
||||||
from typing import Iterable, Optional, Tuple, List, Type, Dict
|
from typing import Iterable, Optional, Tuple, List, Type, Dict
|
||||||
|
|
||||||
@ -252,28 +252,16 @@ class VlmCausalLMBatch(FlashCausalLMBatch):
|
|||||||
video_inputs = None
|
video_inputs = None
|
||||||
if videos:
|
if videos:
|
||||||
try:
|
try:
|
||||||
tensor_videos = []
|
|
||||||
video = videos[0]
|
video = videos[0]
|
||||||
video_buffer = BytesIO(video.data)
|
# Frames are already sampled and resized
|
||||||
video, _audio, info = io.read_video(
|
frames = [
|
||||||
video_buffer,
|
torch.from_numpy(np.frombuffer(frame, dtype=np.uint8).reshape(video.height, video.width, 3))
|
||||||
start_pts=0.0,
|
for frame in video.frames
|
||||||
end_pts=None,
|
]
|
||||||
pts_unit="sec",
|
video_tensor = torch.stack(frames).permute(0, 3, 1, 2) # NHWC -> NCHW
|
||||||
output_format="TCHW",
|
|
||||||
)
|
# Apply any additional preprocessing required by the model
|
||||||
total_frames, video_fps = video.size(0), info["video_fps"]
|
tensor_videos = [video_tensor]
|
||||||
nframes = smart_nframes(
|
|
||||||
fps=30,
|
|
||||||
nframes=None,
|
|
||||||
min_frames=16,
|
|
||||||
max_frames=64,
|
|
||||||
total_frames=total_frames,
|
|
||||||
video_fps=video_fps,
|
|
||||||
)
|
|
||||||
idx = torch.linspace(0, total_frames - 1, nframes).round().long()
|
|
||||||
video = video[idx]
|
|
||||||
tensor_videos.append(video)
|
|
||||||
video_inputs = processor.image_processor(
|
video_inputs = processor.image_processor(
|
||||||
tensor_videos, return_tensors="pt"
|
tensor_videos, return_tensors="pt"
|
||||||
)
|
)
|
||||||
|
Loading…
Reference in New Issue
Block a user