mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 12:24:53 +00:00
feat: consolidate changes with existing vlms and add support and test for smolvlm
This commit is contained in:
parent
064e040ee3
commit
0d1bf9e983
@ -0,0 +1,61 @@
|
||||
{
|
||||
"details": {
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "eos_token",
|
||||
"generated_tokens": 8,
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
"id": 330,
|
||||
"logprob": -0.118652344,
|
||||
"special": false,
|
||||
"text": " A"
|
||||
},
|
||||
{
|
||||
"id": 11426,
|
||||
"logprob": -0.28320312,
|
||||
"special": false,
|
||||
"text": " bee"
|
||||
},
|
||||
{
|
||||
"id": 335,
|
||||
"logprob": -0.95703125,
|
||||
"special": false,
|
||||
"text": " on"
|
||||
},
|
||||
{
|
||||
"id": 253,
|
||||
"logprob": -0.06982422,
|
||||
"special": false,
|
||||
"text": " a"
|
||||
},
|
||||
{
|
||||
"id": 11986,
|
||||
"logprob": -0.49414062,
|
||||
"special": false,
|
||||
"text": " pink"
|
||||
},
|
||||
{
|
||||
"id": 8525,
|
||||
"logprob": -0.07763672,
|
||||
"special": false,
|
||||
"text": " flower"
|
||||
},
|
||||
{
|
||||
"id": 30,
|
||||
"logprob": -1.0703125,
|
||||
"special": false,
|
||||
"text": "."
|
||||
},
|
||||
{
|
||||
"id": 49154,
|
||||
"logprob": -0.092285156,
|
||||
"special": true,
|
||||
"text": "<end_of_utterance>"
|
||||
}
|
||||
],
|
||||
"top_tokens": null
|
||||
},
|
||||
"generated_text": " A bee on a pink flower."
|
||||
}
|
31
integration-tests/models/test_smolvlm.py
Normal file
31
integration-tests/models/test_smolvlm.py
Normal file
@ -0,0 +1,31 @@
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def flash_smolvlm_next_handle(launcher):
|
||||
with launcher("HuggingFaceTB/SmolVLM-Instruct") as handle:
|
||||
yield handle
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
async def flash_smolvlm_next(flash_smolvlm_next_handle):
|
||||
await flash_smolvlm_next_handle.health(300)
|
||||
return flash_smolvlm_next_handle.client
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.private
|
||||
async def test_flash_smolvlm_next_simple_url(flash_smolvlm_next, response_snapshot):
|
||||
ny_skyline = "https://huggingface.co/spaces/merve/chameleon-7b/resolve/main/bee.jpg"
|
||||
query = "What is in this image?"
|
||||
response = await flash_smolvlm_next.generate(
|
||||
f"<|begin_of_text|><|begin_of_text|>User:{query}<end_of_utterance>\nAssistant:",
|
||||
max_new_tokens=10,
|
||||
seed=1337,
|
||||
)
|
||||
print(response)
|
||||
assert (
|
||||
response.generated_text == " A bee on a pink flower."
|
||||
), f"{repr(response.generated_text)}"
|
||||
assert response.details.generated_tokens == 8
|
||||
assert response == response_snapshot
|
@ -916,7 +916,7 @@ class Idefics2ForConditionalGeneration(nn.Module):
|
||||
)
|
||||
|
||||
config.quantize = None
|
||||
self.connector = Idefics3Connector(
|
||||
self.connector = Idefics2Connector(
|
||||
prefix=f"{prefix}.model.connector" if prefix else "model.connector",
|
||||
config=config,
|
||||
weights=weights,
|
||||
|
@ -280,8 +280,13 @@ class VlmCausalLMBatch(FlashCausalLMBatch):
|
||||
raise RuntimeError(f"Invalid chunk type {chunk_type}")
|
||||
|
||||
if images:
|
||||
kwargs = {}
|
||||
match processor.image_processor_class:
|
||||
case "Idefics3ImageProcessor":
|
||||
kwargs["return_row_col_info"] = True
|
||||
|
||||
image_inputs = processor.image_processor(
|
||||
images, return_tensors="pt", return_row_col_info=True
|
||||
images, return_tensors="pt", **kwargs
|
||||
)
|
||||
else:
|
||||
image_inputs = None
|
||||
|
Loading…
Reference in New Issue
Block a user