From 2ae152a188d610695e5e7574d1e4eb90e82cd4af Mon Sep 17 00:00:00 2001 From: drbh Date: Thu, 12 Dec 2024 22:00:02 +0000 Subject: [PATCH] fix: update all vlm forward args, pass shared libraries to final layer in docker and doc bump --- Dockerfile_amd | 4 ++++ Dockerfile_intel | 4 ++++ .../models/custom_modeling/flash_pali_gemma_modeling.py | 2 ++ .../text_generation_server/models/custom_modeling/idefics2.py | 2 ++ .../models/custom_modeling/llava_next.py | 2 ++ server/text_generation_server/models/mllama_causal_lm.py | 3 ++- server/text_generation_server/models/pali_gemma.py | 4 +++- 7 files changed, 19 insertions(+), 2 deletions(-) diff --git a/Dockerfile_amd b/Dockerfile_amd index 2ae990f7..5b7f6931 100644 --- a/Dockerfile_amd +++ b/Dockerfile_amd @@ -330,6 +330,10 @@ COPY --from=builder /usr/src/target/release-opt/text-generation-router /usr/loca COPY --from=builder /usr/src/target/release-opt/text-generation-launcher /usr/local/bin/text-generation-launcher ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/opt/conda/lib/" +# Copy the ffmpeg libraries +COPY --from=builder /usr/lib/x86_64-linux-gnu/* /usr/lib/x86_64-linux-gnu-copy/ +ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/lib/x86_64-linux-gnu-copy" + # AWS Sagemaker compatible image FROM base AS sagemaker diff --git a/Dockerfile_intel b/Dockerfile_intel index 82e53179..4426e8eb 100644 --- a/Dockerfile_intel +++ b/Dockerfile_intel @@ -232,6 +232,10 @@ COPY --from=builder /usr/src/target/release-opt/text-generation-router /usr/loca # Install launcher COPY --from=builder /usr/src/target/release-opt/text-generation-launcher /usr/local/bin/text-generation-launcher +# Copy the ffmpeg libraries +COPY --from=builder /usr/lib/x86_64-linux-gnu/* /usr/lib/x86_64-linux-gnu-copy/ +ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/lib/x86_64-linux-gnu-copy" + FROM ${PLATFORM} AS final ENV ATTENTION=paged ENV PREFIX_CACHING=0 diff --git a/server/text_generation_server/models/custom_modeling/flash_pali_gemma_modeling.py b/server/text_generation_server/models/custom_modeling/flash_pali_gemma_modeling.py index b1f89eff..448641e7 100644 --- a/server/text_generation_server/models/custom_modeling/flash_pali_gemma_modeling.py +++ b/server/text_generation_server/models/custom_modeling/flash_pali_gemma_modeling.py @@ -81,6 +81,8 @@ class PaliGemmaForConditionalGeneration(nn.Module): image_sizes: Optional[torch.Tensor] = None, adapter_data: Optional[torch.Tensor] = None, image_grid_thw: Optional[torch.LongTensor] = None, + video_pixel_values: Optional[torch.FloatTensor] = None, + video_grid_thw: Optional[torch.LongTensor] = None, ) -> Tuple[torch.Tensor, Optional[torch.Tensor]]: inputs_embeds = self.text_model.embed_tokens(input_ids) # TODO This is odd but apparently pali gemma position ids start at 1. diff --git a/server/text_generation_server/models/custom_modeling/idefics2.py b/server/text_generation_server/models/custom_modeling/idefics2.py index 923123d6..1e430f56 100644 --- a/server/text_generation_server/models/custom_modeling/idefics2.py +++ b/server/text_generation_server/models/custom_modeling/idefics2.py @@ -751,6 +751,8 @@ class Idefics2ForConditionalGeneration(nn.Module): image_sizes: Optional[torch.Tensor] = None, adapter_data: Optional[torch.Tensor] = None, image_grid_thw: Optional[torch.LongTensor] = None, + video_pixel_values: Optional[torch.FloatTensor] = None, + video_grid_thw: Optional[torch.LongTensor] = None, ): inputs_embeds = self.text_model.embed_tokens(input_ids) if pixel_values is not None: diff --git a/server/text_generation_server/models/custom_modeling/llava_next.py b/server/text_generation_server/models/custom_modeling/llava_next.py index df7366ea..e5572bef 100644 --- a/server/text_generation_server/models/custom_modeling/llava_next.py +++ b/server/text_generation_server/models/custom_modeling/llava_next.py @@ -181,6 +181,8 @@ class LlavaNextForConditionalGeneration(nn.Module): image_sizes: Optional[torch.LongTensor] = None, adapter_data: Optional[torch.Tensor] = None, image_grid_thw: Optional[torch.LongTensor] = None, + video_pixel_values: Optional[torch.FloatTensor] = None, + video_grid_thw: Optional[torch.LongTensor] = None, ): inputs_embeds = self.text_model.embed_tokens(input_ids) if pixel_values is not None and len(pixel_values) > 0: diff --git a/server/text_generation_server/models/mllama_causal_lm.py b/server/text_generation_server/models/mllama_causal_lm.py index 28e7489e..ce899a48 100644 --- a/server/text_generation_server/models/mllama_causal_lm.py +++ b/server/text_generation_server/models/mllama_causal_lm.py @@ -148,7 +148,8 @@ class MllamaCausalLMBatch(VlmCausalLMBatch): if image_inputs is not None: assert len(image_indices) == image_inputs["pixel_values"].shape[0] - return batch_tokenized_inputs, image_inputs + video_inputs = None + return batch_tokenized_inputs, image_inputs, video_inputs @classmethod def from_pb_processor( diff --git a/server/text_generation_server/models/pali_gemma.py b/server/text_generation_server/models/pali_gemma.py index fe75570e..5b6960a0 100644 --- a/server/text_generation_server/models/pali_gemma.py +++ b/server/text_generation_server/models/pali_gemma.py @@ -68,4 +68,6 @@ class PaliGemmaBatch(VlmCausalLMBatch): image_inputs = new_image_inputs else: image_inputs = None - return batch_tokenized_inputs, image_inputs + + video_inputs = None + return batch_tokenized_inputs, image_inputs, video_inputs