working version

This commit is contained in:
Miquel Farre 2024-12-11 21:08:03 +00:00 committed by drbh
parent af77a0cadf
commit 19e1c8da31
4 changed files with 22 additions and 3 deletions

View File

@ -83,6 +83,7 @@ impl ChunksToString for Vec<InputChunk> {
data,
mimetype,
width,
height,
frames: _,
})) => {
// TODO: revisit if we should limit video support to v3 - to avoid sending very large base64 strings

View File

@ -443,6 +443,7 @@ impl State {
data: video.data,
mimetype: video.mimetype,
width: video.width,
height: video.height,
frames: video.num_frames,
}),
}),

View File

@ -74,8 +74,11 @@ message Video {
/// Video width
uint32 width = 3;
/// Video height
uint32 height = 4;
/// Total number of frames
uint32 frames = 4;
uint32 frames = 5;
}
message InputChunk {

View File

@ -244,15 +244,29 @@ class VlmCausalLMBatch(FlashCausalLMBatch):
)
num_bytes = len(video_frame_buf)
bytes_per_frame = num_bytes // chunk.video.frames
height = bytes_per_frame // 3 // chunk.video.width
#height = bytes_per_frame // 3 // chunk.video.width
from loguru import logger
log_master(
logger.info,
f"Video buffer size: {len(video_frame_buf)}",
)
log_master(
logger.info,
f"Frames in chunk: {chunk.video.frames}",
)
log_master(
logger.info,
f"Bytes per frame: {bytes_per_frame}",
)
# iterate over with a stride the size of a frame
frames = []
for i in range(chunk.video.frames):
frame = video_frame_buf[
i * bytes_per_frame : (i + 1) * bytes_per_frame
]
frame = frame.reshape(height, chunk.video.width, 3)
frame = frame.reshape(chunk.video.height, chunk.video.width, 3)
frames.append(frame)
video_frame_buf = np.stack(frames)