diff --git a/.github/workflows/autodocs.yaml b/.github/workflows/autodocs.yaml index a768f263..cbcc29f5 100644 --- a/.github/workflows/autodocs.yaml +++ b/.github/workflows/autodocs.yaml @@ -20,7 +20,7 @@ jobs: - name: Install Protocol Buffers compiler run: | sudo apt-get update - sudo apt-get install -y protobuf-compiler libprotobuf-dev + sudo apt-get install -y protobuf-compiler libprotobuf-dev clang libavcodec-dev libavfilter-dev libavdevice-dev libavformat-dev libavutil-dev pkg-config - name: Install Launcher id: install-launcher diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 4eeca334..b54bb749 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -43,7 +43,9 @@ jobs: - name: Install run: | sudo apt update - sudo apt install python3.11-dev -y + sudo apt install python3.11-dev python3.11-venv python3-pip clang libavcodec-dev libavfilter-dev libavdevice-dev libavformat-dev libavutil-dev pkg-config -y + export PKG_CONFIG_PATH=$PKG_CONFIG_PATH:/usr/lib/x86_64-linux-gnu/pkgconfig + python -m pip install --upgrade pip make install-cpu - name: Run server tests run: | diff --git a/Cargo.lock b/Cargo.lock index 74ae6e16..5c64b43e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -267,7 +267,7 @@ version = "0.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ad3a619a9de81e1d7de1f1186dcba4506ed661a0e483d84410fdef0ee87b2f96" dependencies = [ - "bindgen", + "bindgen 0.69.5", "cc", "cmake", "dunce", @@ -454,6 +454,24 @@ dependencies = [ "which", ] +[[package]] +name = "bindgen" +version = "0.70.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f49d8fed880d473ea71efb9bf597651e77201bdd4893efe54c9e5d65ae04ce6f" +dependencies = [ + "bitflags 2.6.0", + "cexpr", + "clang-sys", + "itertools 0.13.0", + "proc-macro2", + "quote", + "regex", + "rustc-hash", + "shlex", + "syn 2.0.89", +] + [[package]] name = "bit-set" version = "0.5.3" @@ -487,6 +505,15 @@ version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" +[[package]] +name = "bitreader" +version = "0.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "886559b1e163d56c765bc3a985febb4eee8009f625244511d8ee3c432e08c066" +dependencies = [ + "cfg-if", +] + [[package]] name = "bitstream-io" version = "2.6.0" @@ -1194,6 +1221,15 @@ dependencies = [ "zune-inflate", ] +[[package]] +name = "fallible_collections" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a88c69768c0a15262df21899142bc6df9b9b823546d4b4b9a7bc2d6c448ec6fd" +dependencies = [ + "hashbrown 0.13.2", +] + [[package]] name = "fancy-regex" version = "0.11.0" @@ -1219,6 +1255,31 @@ dependencies = [ "simd-adler32", ] +[[package]] +name = "ffmpeg-next" +version = "7.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da02698288e0275e442a47fc12ca26d50daf0d48b15398ba5906f20ac2e2a9f9" +dependencies = [ + "bitflags 2.6.0", + "ffmpeg-sys-next", + "libc", +] + +[[package]] +name = "ffmpeg-sys-next" +version = "7.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2bc3234d0a4b2f7d083699d0860c6c9dd83713908771b60f94a96f8704adfe45" +dependencies = [ + "bindgen 0.70.1", + "cc", + "libc", + "num_cpus", + "pkg-config", + "vcpkg", +] + [[package]] name = "fixedbitset" version = "0.4.2" @@ -1512,6 +1573,15 @@ version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +[[package]] +name = "hashbrown" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43a3c133739dddd0d2990f9a4bdf8eb4b21ef50e4851ca85ab661199821d510e" +dependencies = [ + "ahash", +] + [[package]] name = "hashbrown" version = "0.14.5" @@ -2471,6 +2541,20 @@ dependencies = [ "syn 2.0.89", ] +[[package]] +name = "mp4parse" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63a35203d3c6ce92d5251c77520acb2e57108c88728695aa883f70023624c570" +dependencies = [ + "bitreader", + "byteorder", + "fallible_collections", + "log", + "num-traits", + "static_assertions", +] + [[package]] name = "multimap" version = "0.10.0" @@ -4425,6 +4509,7 @@ dependencies = [ "base64 0.22.1", "clap 4.5.21", "csv", + "ffmpeg-next", "futures", "futures-util", "hf-hub", @@ -4436,6 +4521,7 @@ dependencies = [ "metrics-exporter-prometheus", "minijinja", "minijinja-contrib", + "mp4parse", "ngrok", "nohash-hasher", "once_cell", @@ -4449,6 +4535,7 @@ dependencies = [ "serde", "serde_json", "sysinfo", + "tempfile", "thiserror", "tokenizers", "tokio", diff --git a/Dockerfile b/Dockerfile index 0c08d48f..a2fcf8a7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -20,6 +20,20 @@ FROM chef AS builder RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ python3.11-dev + +RUN apt-get update && apt-get install -y \ + ffmpeg \ + libavcodec-dev \ + libavfilter-dev \ + libavdevice-dev \ + libavformat-dev \ + libavutil-dev \ + libswscale-dev \ + pkg-config \ + libclang-dev \ + clang \ + && rm -rf /var/lib/apt/lists/* + RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \ curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \ unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \ @@ -27,7 +41,7 @@ RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \ rm -f $PROTOC_ZIP COPY --from=planner /usr/src/recipe.json recipe.json -RUN cargo chef cook --profile release-opt --recipe-path recipe.json +RUN cargo chef cook --profile release-opt --features video --recipe-path recipe.json ARG GIT_SHA ARG DOCKER_LABEL @@ -40,7 +54,7 @@ COPY benchmark benchmark COPY router router COPY backends backends COPY launcher launcher -RUN cargo build --profile release-opt --frozen +RUN cargo build --profile release-opt --frozen --features video # Python builder # Adapted from: https://github.com/pytorch/pytorch/blob/master/Dockerfile @@ -61,18 +75,18 @@ ARG TARGETPLATFORM ENV PATH /opt/conda/bin:$PATH RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ - build-essential \ - ca-certificates \ - ccache \ - curl \ - git && \ - rm -rf /var/lib/apt/lists/* + build-essential \ + ca-certificates \ + ccache \ + curl \ + git && \ + rm -rf /var/lib/apt/lists/* # Install conda # translating Docker's TARGETPLATFORM into mamba arches RUN case ${TARGETPLATFORM} in \ - "linux/arm64") MAMBA_ARCH=aarch64 ;; \ - *) MAMBA_ARCH=x86_64 ;; \ + "linux/arm64") MAMBA_ARCH=aarch64 ;; \ + *) MAMBA_ARCH=x86_64 ;; \ esac && \ curl -fsSL -v -o ~/mambaforge.sh -O "https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-${MAMBA_ARCH}.sh" RUN chmod +x ~/mambaforge.sh && \ @@ -82,12 +96,15 @@ RUN chmod +x ~/mambaforge.sh && \ # Install pytorch # On arm64 we exit with an error code RUN case ${TARGETPLATFORM} in \ - "linux/arm64") exit 1 ;; \ - *) /opt/conda/bin/conda update -y conda && \ - /opt/conda/bin/conda install -c "${INSTALL_CHANNEL}" -c "${CUDA_CHANNEL}" -y "python=${PYTHON_VERSION}" "pytorch=$PYTORCH_VERSION" "pytorch-cuda=$(echo $CUDA_VERSION | cut -d'.' -f 1-2)" ;; \ + "linux/arm64") exit 1 ;; \ + *) /opt/conda/bin/conda update -y conda && \ + /opt/conda/bin/conda install -c "${INSTALL_CHANNEL}" -c "${CUDA_CHANNEL}" -y "python=${PYTHON_VERSION}" "pytorch=$PYTORCH_VERSION" "pytorch-cuda=$(echo $CUDA_VERSION | cut -d'.' -f 1-2)" "openssl>=3.3.0" ;; \ esac && \ /opt/conda/bin/conda clean -ya +RUN /opt/conda/bin/conda install -y pyOpenSSL + + # CUDA kernels builder image FROM pytorch-install AS kernel-builder @@ -95,8 +112,8 @@ ARG MAX_JOBS=8 ENV TORCH_CUDA_ARCH_LIST="8.0;8.6;9.0+PTX" RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ - ninja-build cmake \ - && rm -rf /var/lib/apt/lists/* + ninja-build cmake \ + && rm -rf /var/lib/apt/lists/* # Build Flash Attention CUDA kernels FROM kernel-builder AS flash-att-builder @@ -188,12 +205,15 @@ ENV HF_HOME=/data \ WORKDIR /usr/src RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ - libssl-dev \ - ca-certificates \ - make \ - curl \ - git \ - && rm -rf /var/lib/apt/lists/* + libssl-dev \ + ca-certificates \ + make \ + curl \ + git \ + && rm -rf /var/lib/apt/lists/* + +# Add ffmpeg libraries to the path +ENV LD_LIBRARY_PATH="/usr/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH" # Copy conda with PyTorch installed COPY --from=pytorch-install /opt/conda /opt/conda @@ -239,6 +259,8 @@ RUN cd server && \ ENV LD_PRELOAD=/opt/conda/lib/python3.11/site-packages/nvidia/nccl/lib/libnccl.so.2 # Required to find libpython within the rust binaries ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/opt/conda/lib/" +ENV LD_PRELOAD="/opt/conda/lib/libcrypto.so.3" + # This is needed because exl2 tries to load flash-attn # And fails with our builds. ENV EXLLAMA_NO_FLASH_ATTN=1 @@ -247,9 +269,9 @@ ENV EXLLAMA_NO_FLASH_ATTN=1 # The binaries change on every build given we burn the SHA into them # The deps change less often. RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ - build-essential \ - g++ \ - && rm -rf /var/lib/apt/lists/* + build-essential \ + g++ \ + && rm -rf /var/lib/apt/lists/* # Install benchmarker COPY --from=builder /usr/src/target/release-opt/text-generation-benchmark /usr/local/bin/text-generation-benchmark @@ -258,6 +280,9 @@ COPY --from=builder /usr/src/target/release-opt/text-generation-router /usr/loca # Install launcher COPY --from=builder /usr/src/target/release-opt/text-generation-launcher /usr/local/bin/text-generation-launcher +# Copy the ffmpeg libraries +COPY --from=builder /usr/lib/x86_64-linux-gnu/* /usr/lib/x86_64-linux-gnu-copy/ +ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/lib/x86_64-linux-gnu-copy" # AWS Sagemaker compatible image FROM base AS sagemaker diff --git a/backends/client/src/lib.rs b/backends/client/src/lib.rs index 45bee10c..cec820f6 100644 --- a/backends/client/src/lib.rs +++ b/backends/client/src/lib.rs @@ -9,7 +9,7 @@ use thiserror::Error; use tonic::transport; use tonic::Status; -pub use v3::{Chunk, Image, Input, InputChunk}; +pub use v3::{Chunk, Image, Input, InputChunk, Video}; #[async_trait] pub trait Health { @@ -79,6 +79,20 @@ impl ChunksToString for Vec { let encoded = STANDARD.encode(data); output.push_str(&format!("![](data:{};base64,{})", mimetype, encoded)) } + Some(Chunk::Video(Video { + data, + mimetype, + width, + height: _, + frames: _, + })) => { + // TODO: revisit if we should limit video support to v3 - to avoid sending very large base64 strings + let encoded = STANDARD.encode(data); + output.push_str(&format!( + r#""#, + width, mimetype, encoded, mimetype + )); + } // We don't create empty chunks, so this should be unreachable. None => unreachable!("Chunks should never be empty"), }); diff --git a/backends/client/src/v3/mod.rs b/backends/client/src/v3/mod.rs index 4a1296a2..0ebd2799 100644 --- a/backends/client/src/v3/mod.rs +++ b/backends/client/src/v3/mod.rs @@ -8,6 +8,6 @@ pub use client::Client; pub use pb::generate::v3::{ input_chunk::Chunk, Batch, CachedBatch, FinishReason, GeneratedText, Generation, GrammarType, HealthResponse, Image, InfoResponse, Input, InputChunk, NextTokenChooserParameters, Request, - StoppingCriteriaParameters, Tokens, + StoppingCriteriaParameters, Tokens, Video, }; pub use sharded_client::ShardedClient; diff --git a/backends/trtllm/src/looper.rs b/backends/trtllm/src/looper.rs index 969046d1..e745e2ee 100644 --- a/backends/trtllm/src/looper.rs +++ b/backends/trtllm/src/looper.rs @@ -301,6 +301,7 @@ impl TensorRtLlmBackendV2 { 1 => match request.inputs.first().expect("Single item-chunk") { Chunk::Text(_) => Ok(()), Chunk::Image(_) => Err(ValidationError(UnsupportedModality("image"))), + Chunk::Video(_) => Err(ValidationError(UnsupportedModality("video"))), }, } } diff --git a/backends/v3/src/client/mod.rs b/backends/v3/src/client/mod.rs index d4ac50c9..3d87c319 100644 --- a/backends/v3/src/client/mod.rs +++ b/backends/v3/src/client/mod.rs @@ -15,7 +15,7 @@ pub use grpc_client::Client; pub use pb::generate::v3::{ input_chunk::Chunk, Batch, CachedBatch, FinishReason, GeneratedText, Generation, GrammarType, HealthResponse, Image, InfoResponse, Input, InputChunk, NextTokenChooserParameters, Request, - StoppingCriteriaParameters, + StoppingCriteriaParameters, Video, }; pub use sharded_client::ShardedClient; diff --git a/backends/v3/src/queue.rs b/backends/v3/src/queue.rs index dd27806f..a5dd6cb5 100644 --- a/backends/v3/src/queue.rs +++ b/backends/v3/src/queue.rs @@ -439,6 +439,13 @@ impl State { data: image.data, mimetype: image.mimetype, }), + Chunk::Video(video) => client::Chunk::Video(client::Video { + data: video.data, + mimetype: video.mimetype, + width: video.width, + height: video.height, + frames: video.num_frames, + }), }), }) .collect(), diff --git a/docs/openapi.json b/docs/openapi.json index 48120f77..320e3237 100644 --- a/docs/openapi.json +++ b/docs/openapi.json @@ -1922,6 +1922,24 @@ ] } } + }, + { + "type": "object", + "required": [ + "video_url", + "type" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "video_url" + ] + }, + "video_url": { + "$ref": "#/components/schemas/Url" + } + } } ], "discriminator": { diff --git a/flake.nix b/flake.nix index 83cedfa6..f522bb88 100644 --- a/flake.nix +++ b/flake.nix @@ -115,15 +115,17 @@ buildInputs = [ benchmark - launcher - router - server + cargo client + clippy + ffmpeg + launcher openssl.dev pkg-config - cargo + router + rustPlatform.bindgenHook rustfmt - clippy + server ] ++ (with python3.pkgs; [ docker diff --git a/integration-tests/models/__snapshots__/test_flash_qwen2_vl_video/test_qwen2_vl_simpl.json b/integration-tests/models/__snapshots__/test_flash_qwen2_vl_video/test_qwen2_vl_simpl.json new file mode 100644 index 00000000..612edb07 --- /dev/null +++ b/integration-tests/models/__snapshots__/test_flash_qwen2_vl_video/test_qwen2_vl_simpl.json @@ -0,0 +1,19 @@ +{ + "choices": [ + { + "delta": { + "content": "", + "role": "assistant" + }, + "finish_reason": "stop", + "index": 0, + "logprobs": null + } + ], + "created": 1733450914, + "id": "", + "model": "Qwen/Qwen2-VL-7B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "2.4.2-dev0-native", + "usage": null +} diff --git a/integration-tests/models/test_flash_qwen2_vl_video.py b/integration-tests/models/test_flash_qwen2_vl_video.py new file mode 100644 index 00000000..79eea3c7 --- /dev/null +++ b/integration-tests/models/test_flash_qwen2_vl_video.py @@ -0,0 +1,75 @@ +import pytest +import json +import requests + + +@pytest.fixture(scope="module") +def qwen2_vl_handle(launcher): + with launcher( + "Qwen/Qwen2-VL-7B-Instruct", + max_input_length=10_000, + max_batch_prefill_tokens=10_000, + max_total_tokens=10_001, + cuda_graphs=[0], + ) as handle: + yield handle + + +@pytest.fixture(scope="module") +async def qwen2_vl(qwen2_vl_handle): + await qwen2_vl_handle.health(300) + return qwen2_vl_handle.client + + +@pytest.mark.asyncio +async def test_qwen2_vl_simpl(qwen2_vl, response_snapshot): + responses = requests.post( + f"{qwen2_vl.base_url}/v1/chat/completions", + headers=qwen2_vl.headers, + json={ + "model": "tgi", + "messages": [ + { + "role": "user", + "content": [ + { + "type": "video_url", + "video_url": { + "url": "https://test-videos.co.uk/vids/bigbuckbunny/mp4/h264/360/Big_Buck_Bunny_360_10s_1MB.mp4" + }, + }, + { + "type": "text", + "text": "Describe this video.", + }, + ], + }, + ], + "seed": 42, + "max_tokens": 100, + "stream": True, + }, + ) + + # iterate over the response in chunks + count = 0 + full_text = "" + last_response = None + for chunk in responses.iter_content(chunk_size=1024): + if chunk: + count += 1 + # remove the "data: " prefix, trailing newline, and split the chunk into individual lines + lines = chunk.decode("utf-8").replace("data: ", "").rstrip("\n").split("\n") + for line in lines: + if line == "[DONE]": + break + print("=", line) + try: + response = json.loads(line) + # print(response) + last_response = response + full_text += response["choices"][0]["delta"]["content"] + except json.JSONDecodeError: + pass + + assert last_response == response_snapshot diff --git a/nix/crate-overrides.nix b/nix/crate-overrides.nix index a4e74c6d..36fee1d1 100644 --- a/nix/crate-overrides.nix +++ b/nix/crate-overrides.nix @@ -19,6 +19,26 @@ defaultCrateOverrides }; rav1e = attrs: { env.CARGO_ENCODED_RUSTFLAGS = "-C target-feature=-crt-static"; }; + ffmpeg-sys-next = attrs: { + nativeBuildInputs = [ + pkg-config + ]; + buildInputs = [ + rustPlatform.bindgenHook + ffmpeg + ]; + }; + + ffmpeg-next = attrs: { + # Somehow the variables that are passed are mangled, so they are not + # correctly passed to the ffmpeg-next build script. Worth investigating + # more since it's probably a bug in crate2nix or buildRustCrate. + postPatch = '' + substituteInPlace build.rs \ + --replace-fail "DEP_FFMPEG_" "DEP_FFMPEG_SYS_NEXT_" + ''; + }; + grpc-metadata = attrs: { src = filter { root = ../backends/grpc-metadata; diff --git a/nix/impure-shell.nix b/nix/impure-shell.nix index a13fd711..9d61ae9a 100644 --- a/nix/impure-shell.nix +++ b/nix/impure-shell.nix @@ -5,9 +5,11 @@ cmake, isort, ninja, + rustPlatform, which, cudaPackages, openssl, + ffmpeg, pkg-config, poetry, protobuf, @@ -26,6 +28,7 @@ mkShell { nativeBuildInputs = [ + rustPlatform.bindgenHook black isort pkg-config @@ -53,6 +56,7 @@ mkShell { buildInputs = [ openssl.dev + ffmpeg ] ++ (with python3.pkgs; [ venvShellHook diff --git a/proto/v3/generate.proto b/proto/v3/generate.proto index 02980b6f..0d707ee9 100644 --- a/proto/v3/generate.proto +++ b/proto/v3/generate.proto @@ -64,12 +64,31 @@ message Image { string mimetype = 2; } +message Video { + /// Binary video data (array of RGB data) + bytes data = 1; + + /// Video MIME type. + string mimetype = 2; + + /// Video width + uint32 width = 3; + + /// Video height + uint32 height = 4; + + /// Total number of frames + uint32 frames = 5; +} + message InputChunk { oneof chunk { /// Plain text data string text = 1; /// Image data Image image = 2; + /// Video URLs + Video video = 3; } } diff --git a/router/Cargo.toml b/router/Cargo.toml index 9258fe03..f35428e9 100644 --- a/router/Cargo.toml +++ b/router/Cargo.toml @@ -14,20 +14,23 @@ async-stream = "0.3.5" axum = { version = "0.7", features = ["json"] } axum-tracing-opentelemetry = "0.16" clap = { version = "4.4.5", features = ["derive", "env"] } +ffmpeg-next = { version = "7.1.0", optional = true } futures = "0.3.28" hf-hub = { workspace = true } itertools = "0.10" jsonschema = { version = "0.17.1", features = ["draft202012"] } metrics = { workspace = true } metrics-exporter-prometheus = { workspace = true } +mp4parse = { version = "0.17.0", optional = true } nohash-hasher = "0.2.0" opentelemetry = { version = "0.20.0", features = ["rt-tokio"] } -opentelemetry-otlp = "0.13.0" outlines-core = { git = "https://github.com/dottxt-ai/outlines-core.git", rev = "ba10c619fc9bf3c487e43f49bdecb95a24bb465c" } +opentelemetry-otlp = "0.13.0" rand = "0.8.5" reqwest = { version = "0.11.20", features = [] } serde = "1.0.188" serde_json = "1.0.107" +tempfile = { version = "3.10.1", optional = true } thiserror = "1.0.48" tokenizers = { workspace = true } tokio = { version = "1.32.0", features = [ @@ -74,3 +77,4 @@ default = ["ngrok"] ngrok = ["dep:ngrok"] google = [] kserve = [] +video = ["ffmpeg-next", "mp4parse", "tempfile"] diff --git a/router/src/lib.rs b/router/src/lib.rs index 84e9bc48..e405129b 100644 --- a/router/src/lib.rs +++ b/router/src/lib.rs @@ -1173,6 +1173,7 @@ pub struct Url { pub enum MessageChunk { Text { text: String }, ImageUrl { image_url: Url }, + VideoUrl { video_url: Url }, } #[derive(Clone, Deserialize, ToSchema, Serialize, Debug, PartialEq)] @@ -1229,6 +1230,9 @@ impl From for TextMessage { .map(|chunk| match chunk { MessageChunk::Text { text } => text, MessageChunk::ImageUrl { image_url } => format!("![]({})", image_url.url), + MessageChunk::VideoUrl { video_url } => { + format!("