mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-21 23:12:07 +00:00
feat: add tests for image types and remove alpha from png
This commit is contained in:
parent
e5ec176bf4
commit
659ce4f3fc
@ -0,0 +1,26 @@
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"message": {
|
||||
"content": "Okay, let's analyze the image.\n\nThe image is a solid, bright white color. There is nothing else visible within it. \n\nIt's essentially a blank white canvas or a completely white square. \n\nIs there anything specific you'd like me to do with this image, such as describe it further or imagine what it might represent?",
|
||||
"name": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1741965894,
|
||||
"id": "",
|
||||
"model": "google/gemma-3-4b-it",
|
||||
"object": "chat.completion",
|
||||
"system_fingerprint": "3.2.1-dev0-native",
|
||||
"usage": {
|
||||
"completion_tokens": 74,
|
||||
"prompt_tokens": 277,
|
||||
"total_tokens": 351
|
||||
}
|
||||
}
|
@ -0,0 +1,26 @@
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"message": {
|
||||
"content": "Okay, let's analyze the image. \n\nThe image is entirely white, with a very subtle, faint outline of a stylized, cartoonish figure. It appears to be a simplified depiction of a person, likely a child, with a wide-eyed expression and a small, rounded body. \n\nIt's almost like a minimalist, iconic representation. \n\nDo you want me to try and describe it in more detail or perhaps speculate about the context of the image?",
|
||||
"name": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1741965892,
|
||||
"id": "",
|
||||
"model": "google/gemma-3-4b-it",
|
||||
"object": "chat.completion",
|
||||
"system_fingerprint": "3.2.1-dev0-native",
|
||||
"usage": {
|
||||
"completion_tokens": 98,
|
||||
"prompt_tokens": 277,
|
||||
"total_tokens": 375
|
||||
}
|
||||
}
|
@ -0,0 +1,26 @@
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"message": {
|
||||
"content": "Okay, let's analyze the image. \n\nThe transparent image reveals a stylized depiction of **a human head**. It's a minimalist, geometric representation, showing the basic shapes of the skull, eye sockets, and head outline. \n\nDo you want me to describe any specific element of the image in more detail?",
|
||||
"name": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1741966313,
|
||||
"id": "",
|
||||
"model": "google/gemma-3-4b-it",
|
||||
"object": "chat.completion",
|
||||
"system_fingerprint": "3.2.1-dev0-native",
|
||||
"usage": {
|
||||
"completion_tokens": 67,
|
||||
"prompt_tokens": 277,
|
||||
"total_tokens": 344
|
||||
}
|
||||
}
|
@ -1,3 +1,7 @@
|
||||
import base64
|
||||
from io import BytesIO
|
||||
from PIL import Image
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@ -91,3 +95,76 @@ async def test_exceed_window(flash_gemma3, response_snapshot):
|
||||
)
|
||||
assert response.details.generated_tokens == 16
|
||||
assert response == response_snapshot
|
||||
|
||||
|
||||
# Helper function to convert a Pillow image to a base64 data URL
|
||||
def image_to_data_url(img: Image.Image, fmt: str) -> str:
|
||||
buffer = BytesIO()
|
||||
img.save(buffer, format=fmt)
|
||||
img_data = buffer.getvalue()
|
||||
b64_str = base64.b64encode(img_data).decode("utf-8")
|
||||
mime_type = "image/png" if fmt.upper() == "PNG" else "image/jpeg"
|
||||
return f"data:{mime_type};base64,{b64_str}"
|
||||
|
||||
|
||||
async def test_flash_gemma3_image_base64_rgba(flash_gemma3, response_snapshot):
|
||||
# Create an empty 100x100 PNG image with alpha (transparent background)
|
||||
img = Image.new("RGBA", (100, 100), (0, 0, 0, 0))
|
||||
data_url = image_to_data_url(img, "PNG")
|
||||
response = await flash_gemma3.chat(
|
||||
seed=42,
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "image_url", "image_url": {"url": data_url}},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "What do you see in this transparent image?",
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
max_tokens=100,
|
||||
)
|
||||
assert response == response_snapshot
|
||||
|
||||
|
||||
async def test_flash_gemma3_image_base64_rgb_png(flash_gemma3, response_snapshot):
|
||||
# Create an empty 100x100 PNG image without alpha (white background)
|
||||
img = Image.new("RGB", (100, 100), (255, 255, 255))
|
||||
data_url = image_to_data_url(img, "PNG")
|
||||
response = await flash_gemma3.chat(
|
||||
seed=42,
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "image_url", "image_url": {"url": data_url}},
|
||||
{"type": "text", "text": "What do you see in this plain image?"},
|
||||
],
|
||||
},
|
||||
],
|
||||
max_tokens=100,
|
||||
)
|
||||
assert response == response_snapshot
|
||||
|
||||
|
||||
async def test_flash_gemma3_image_base64_rgb_jpg(flash_gemma3, response_snapshot):
|
||||
# Create an empty 100x100 JPEG image (white background)
|
||||
img = Image.new("RGB", (100, 100), (255, 255, 255))
|
||||
data_url = image_to_data_url(img, "JPEG")
|
||||
response = await flash_gemma3.chat(
|
||||
seed=42,
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "image_url", "image_url": {"url": data_url}},
|
||||
{"type": "text", "text": "What do you see in this JPEG image?"},
|
||||
],
|
||||
},
|
||||
],
|
||||
max_tokens=100,
|
||||
)
|
||||
assert response == response_snapshot
|
||||
|
@ -263,7 +263,7 @@ class Gemma3ImageProcessor(BaseImageProcessor):
|
||||
return_tensors: Optional[Union[str, TensorType]] = None,
|
||||
data_format: Optional[ChannelDimension] = ChannelDimension.FIRST,
|
||||
input_data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||
do_convert_rgb: bool = None,
|
||||
do_convert_rgb: bool = True,
|
||||
do_pan_and_scan: bool = None,
|
||||
pan_and_scan_min_crop_size: int = None,
|
||||
pan_and_scan_max_num_crops: int = None,
|
||||
|
Loading…
Reference in New Issue
Block a user