mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-26 12:32:10 +00:00
fix: update all vlm forward args, pass shared libraries to final layer in docker and doc bump
This commit is contained in:
parent
1d6bf243eb
commit
2ae152a188
@ -330,6 +330,10 @@ COPY --from=builder /usr/src/target/release-opt/text-generation-router /usr/loca
|
||||
COPY --from=builder /usr/src/target/release-opt/text-generation-launcher /usr/local/bin/text-generation-launcher
|
||||
ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/opt/conda/lib/"
|
||||
|
||||
# Copy the ffmpeg libraries
|
||||
COPY --from=builder /usr/lib/x86_64-linux-gnu/* /usr/lib/x86_64-linux-gnu-copy/
|
||||
ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/lib/x86_64-linux-gnu-copy"
|
||||
|
||||
# AWS Sagemaker compatible image
|
||||
FROM base AS sagemaker
|
||||
|
||||
|
@ -232,6 +232,10 @@ COPY --from=builder /usr/src/target/release-opt/text-generation-router /usr/loca
|
||||
# Install launcher
|
||||
COPY --from=builder /usr/src/target/release-opt/text-generation-launcher /usr/local/bin/text-generation-launcher
|
||||
|
||||
# Copy the ffmpeg libraries
|
||||
COPY --from=builder /usr/lib/x86_64-linux-gnu/* /usr/lib/x86_64-linux-gnu-copy/
|
||||
ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/lib/x86_64-linux-gnu-copy"
|
||||
|
||||
FROM ${PLATFORM} AS final
|
||||
ENV ATTENTION=paged
|
||||
ENV PREFIX_CACHING=0
|
||||
|
@ -81,6 +81,8 @@ class PaliGemmaForConditionalGeneration(nn.Module):
|
||||
image_sizes: Optional[torch.Tensor] = None,
|
||||
adapter_data: Optional[torch.Tensor] = None,
|
||||
image_grid_thw: Optional[torch.LongTensor] = None,
|
||||
video_pixel_values: Optional[torch.FloatTensor] = None,
|
||||
video_grid_thw: Optional[torch.LongTensor] = None,
|
||||
) -> Tuple[torch.Tensor, Optional[torch.Tensor]]:
|
||||
inputs_embeds = self.text_model.embed_tokens(input_ids)
|
||||
# TODO This is odd but apparently pali gemma position ids start at 1.
|
||||
|
@ -751,6 +751,8 @@ class Idefics2ForConditionalGeneration(nn.Module):
|
||||
image_sizes: Optional[torch.Tensor] = None,
|
||||
adapter_data: Optional[torch.Tensor] = None,
|
||||
image_grid_thw: Optional[torch.LongTensor] = None,
|
||||
video_pixel_values: Optional[torch.FloatTensor] = None,
|
||||
video_grid_thw: Optional[torch.LongTensor] = None,
|
||||
):
|
||||
inputs_embeds = self.text_model.embed_tokens(input_ids)
|
||||
if pixel_values is not None:
|
||||
|
@ -181,6 +181,8 @@ class LlavaNextForConditionalGeneration(nn.Module):
|
||||
image_sizes: Optional[torch.LongTensor] = None,
|
||||
adapter_data: Optional[torch.Tensor] = None,
|
||||
image_grid_thw: Optional[torch.LongTensor] = None,
|
||||
video_pixel_values: Optional[torch.FloatTensor] = None,
|
||||
video_grid_thw: Optional[torch.LongTensor] = None,
|
||||
):
|
||||
inputs_embeds = self.text_model.embed_tokens(input_ids)
|
||||
if pixel_values is not None and len(pixel_values) > 0:
|
||||
|
@ -148,7 +148,8 @@ class MllamaCausalLMBatch(VlmCausalLMBatch):
|
||||
if image_inputs is not None:
|
||||
assert len(image_indices) == image_inputs["pixel_values"].shape[0]
|
||||
|
||||
return batch_tokenized_inputs, image_inputs
|
||||
video_inputs = None
|
||||
return batch_tokenized_inputs, image_inputs, video_inputs
|
||||
|
||||
@classmethod
|
||||
def from_pb_processor(
|
||||
|
@ -68,4 +68,6 @@ class PaliGemmaBatch(VlmCausalLMBatch):
|
||||
image_inputs = new_image_inputs
|
||||
else:
|
||||
image_inputs = None
|
||||
return batch_tokenized_inputs, image_inputs
|
||||
|
||||
video_inputs = None
|
||||
return batch_tokenized_inputs, image_inputs, video_inputs
|
||||
|
Loading…
Reference in New Issue
Block a user