mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-20 06:12:07 +00:00
working version
This commit is contained in:
parent
af77a0cadf
commit
19e1c8da31
@ -83,6 +83,7 @@ impl ChunksToString for Vec<InputChunk> {
|
|||||||
data,
|
data,
|
||||||
mimetype,
|
mimetype,
|
||||||
width,
|
width,
|
||||||
|
height,
|
||||||
frames: _,
|
frames: _,
|
||||||
})) => {
|
})) => {
|
||||||
// TODO: revisit if we should limit video support to v3 - to avoid sending very large base64 strings
|
// TODO: revisit if we should limit video support to v3 - to avoid sending very large base64 strings
|
||||||
|
@ -443,6 +443,7 @@ impl State {
|
|||||||
data: video.data,
|
data: video.data,
|
||||||
mimetype: video.mimetype,
|
mimetype: video.mimetype,
|
||||||
width: video.width,
|
width: video.width,
|
||||||
|
height: video.height,
|
||||||
frames: video.num_frames,
|
frames: video.num_frames,
|
||||||
}),
|
}),
|
||||||
}),
|
}),
|
||||||
|
@ -74,8 +74,11 @@ message Video {
|
|||||||
/// Video width
|
/// Video width
|
||||||
uint32 width = 3;
|
uint32 width = 3;
|
||||||
|
|
||||||
|
/// Video height
|
||||||
|
uint32 height = 4;
|
||||||
|
|
||||||
/// Total number of frames
|
/// Total number of frames
|
||||||
uint32 frames = 4;
|
uint32 frames = 5;
|
||||||
}
|
}
|
||||||
|
|
||||||
message InputChunk {
|
message InputChunk {
|
||||||
|
@ -244,7 +244,21 @@ class VlmCausalLMBatch(FlashCausalLMBatch):
|
|||||||
)
|
)
|
||||||
num_bytes = len(video_frame_buf)
|
num_bytes = len(video_frame_buf)
|
||||||
bytes_per_frame = num_bytes // chunk.video.frames
|
bytes_per_frame = num_bytes // chunk.video.frames
|
||||||
height = bytes_per_frame // 3 // chunk.video.width
|
#height = bytes_per_frame // 3 // chunk.video.width
|
||||||
|
from loguru import logger
|
||||||
|
|
||||||
|
log_master(
|
||||||
|
logger.info,
|
||||||
|
f"Video buffer size: {len(video_frame_buf)}",
|
||||||
|
)
|
||||||
|
log_master(
|
||||||
|
logger.info,
|
||||||
|
f"Frames in chunk: {chunk.video.frames}",
|
||||||
|
)
|
||||||
|
log_master(
|
||||||
|
logger.info,
|
||||||
|
f"Bytes per frame: {bytes_per_frame}",
|
||||||
|
)
|
||||||
|
|
||||||
# iterate over with a stride the size of a frame
|
# iterate over with a stride the size of a frame
|
||||||
frames = []
|
frames = []
|
||||||
@ -252,7 +266,7 @@ class VlmCausalLMBatch(FlashCausalLMBatch):
|
|||||||
frame = video_frame_buf[
|
frame = video_frame_buf[
|
||||||
i * bytes_per_frame : (i + 1) * bytes_per_frame
|
i * bytes_per_frame : (i + 1) * bytes_per_frame
|
||||||
]
|
]
|
||||||
frame = frame.reshape(height, chunk.video.width, 3)
|
frame = frame.reshape(chunk.video.height, chunk.video.width, 3)
|
||||||
frames.append(frame)
|
frames.append(frame)
|
||||||
|
|
||||||
video_frame_buf = np.stack(frames)
|
video_frame_buf = np.stack(frames)
|
||||||
|
Loading…
Reference in New Issue
Block a user