feat: add tests for image types and remove alpha from png

This commit is contained in:
drbh 2025-03-14 15:33:06 +00:00
parent e5ec176bf4
commit 659ce4f3fc
5 changed files with 156 additions and 1 deletions

View File

@ -0,0 +1,26 @@
{
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"message": {
"content": "Okay, let's analyze the image.\n\nThe image is a solid, bright white color. There is nothing else visible within it. \n\nIt's essentially a blank white canvas or a completely white square. \n\nIs there anything specific you'd like me to do with this image, such as describe it further or imagine what it might represent?",
"name": null,
"role": "assistant",
"tool_calls": null
},
"usage": null
}
],
"created": 1741965894,
"id": "",
"model": "google/gemma-3-4b-it",
"object": "chat.completion",
"system_fingerprint": "3.2.1-dev0-native",
"usage": {
"completion_tokens": 74,
"prompt_tokens": 277,
"total_tokens": 351
}
}

View File

@ -0,0 +1,26 @@
{
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"message": {
"content": "Okay, let's analyze the image. \n\nThe image is entirely white, with a very subtle, faint outline of a stylized, cartoonish figure. It appears to be a simplified depiction of a person, likely a child, with a wide-eyed expression and a small, rounded body. \n\nIt's almost like a minimalist, iconic representation. \n\nDo you want me to try and describe it in more detail or perhaps speculate about the context of the image?",
"name": null,
"role": "assistant",
"tool_calls": null
},
"usage": null
}
],
"created": 1741965892,
"id": "",
"model": "google/gemma-3-4b-it",
"object": "chat.completion",
"system_fingerprint": "3.2.1-dev0-native",
"usage": {
"completion_tokens": 98,
"prompt_tokens": 277,
"total_tokens": 375
}
}

View File

@ -0,0 +1,26 @@
{
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"message": {
"content": "Okay, let's analyze the image. \n\nThe transparent image reveals a stylized depiction of **a human head**. It's a minimalist, geometric representation, showing the basic shapes of the skull, eye sockets, and head outline. \n\nDo you want me to describe any specific element of the image in more detail?",
"name": null,
"role": "assistant",
"tool_calls": null
},
"usage": null
}
],
"created": 1741966313,
"id": "",
"model": "google/gemma-3-4b-it",
"object": "chat.completion",
"system_fingerprint": "3.2.1-dev0-native",
"usage": {
"completion_tokens": 67,
"prompt_tokens": 277,
"total_tokens": 344
}
}

View File

@ -1,3 +1,7 @@
import base64
from io import BytesIO
from PIL import Image
import pytest import pytest
@ -91,3 +95,76 @@ async def test_exceed_window(flash_gemma3, response_snapshot):
) )
assert response.details.generated_tokens == 16 assert response.details.generated_tokens == 16
assert response == response_snapshot assert response == response_snapshot
# Helper function to convert a Pillow image to a base64 data URL
def image_to_data_url(img: Image.Image, fmt: str) -> str:
buffer = BytesIO()
img.save(buffer, format=fmt)
img_data = buffer.getvalue()
b64_str = base64.b64encode(img_data).decode("utf-8")
mime_type = "image/png" if fmt.upper() == "PNG" else "image/jpeg"
return f"data:{mime_type};base64,{b64_str}"
async def test_flash_gemma3_image_base64_rgba(flash_gemma3, response_snapshot):
# Create an empty 100x100 PNG image with alpha (transparent background)
img = Image.new("RGBA", (100, 100), (0, 0, 0, 0))
data_url = image_to_data_url(img, "PNG")
response = await flash_gemma3.chat(
seed=42,
messages=[
{
"role": "user",
"content": [
{"type": "image_url", "image_url": {"url": data_url}},
{
"type": "text",
"text": "What do you see in this transparent image?",
},
],
},
],
max_tokens=100,
)
assert response == response_snapshot
async def test_flash_gemma3_image_base64_rgb_png(flash_gemma3, response_snapshot):
# Create an empty 100x100 PNG image without alpha (white background)
img = Image.new("RGB", (100, 100), (255, 255, 255))
data_url = image_to_data_url(img, "PNG")
response = await flash_gemma3.chat(
seed=42,
messages=[
{
"role": "user",
"content": [
{"type": "image_url", "image_url": {"url": data_url}},
{"type": "text", "text": "What do you see in this plain image?"},
],
},
],
max_tokens=100,
)
assert response == response_snapshot
async def test_flash_gemma3_image_base64_rgb_jpg(flash_gemma3, response_snapshot):
# Create an empty 100x100 JPEG image (white background)
img = Image.new("RGB", (100, 100), (255, 255, 255))
data_url = image_to_data_url(img, "JPEG")
response = await flash_gemma3.chat(
seed=42,
messages=[
{
"role": "user",
"content": [
{"type": "image_url", "image_url": {"url": data_url}},
{"type": "text", "text": "What do you see in this JPEG image?"},
],
},
],
max_tokens=100,
)
assert response == response_snapshot

View File

@ -263,7 +263,7 @@ class Gemma3ImageProcessor(BaseImageProcessor):
return_tensors: Optional[Union[str, TensorType]] = None, return_tensors: Optional[Union[str, TensorType]] = None,
data_format: Optional[ChannelDimension] = ChannelDimension.FIRST, data_format: Optional[ChannelDimension] = ChannelDimension.FIRST,
input_data_format: Optional[Union[str, ChannelDimension]] = None, input_data_format: Optional[Union[str, ChannelDimension]] = None,
do_convert_rgb: bool = None, do_convert_rgb: bool = True,
do_pan_and_scan: bool = None, do_pan_and_scan: bool = None,
pan_and_scan_min_crop_size: int = None, pan_and_scan_min_crop_size: int = None,
pan_and_scan_max_num_crops: int = None, pan_and_scan_max_num_crops: int = None,