Update the flaky mllama test.

2025-06-30 21:10:16 +00:00 · 2025-02-11 17:10:36 +01:00 · 2025-02-11 17:10:36 +01:00 · 8a870b31b9
commit 8a870b31b9
parent 571ac9b507
3 changed files with 19 additions and 18 deletions
--- a/integration-tests/models/snapshots/test_mllama/test_mllama_load.json
+++ b/integration-tests/models/snapshots/test_mllama/test_mllama_load.json
@ -6,7 +6,7 @@
        "index": 0,
        "logprobs": null,
        "message": {
-          "content": "In a small town, a chicken named Cluck",
+          "content": "A chicken sits on a pile of money, looking",
          "name": null,
          "role": "assistant",
          "tool_calls": null
@ -14,15 +14,15 @@
        "usage": null
      }
    ],
-    "created": 1738753835,
+    "created": 1739290197,
    "id": "",
    "model": "meta-llama/Llama-3.2-11B-Vision-Instruct",
    "object": "chat.completion",
    "system_fingerprint": "3.1.1-dev0-native",
    "usage": {
      "completion_tokens": 10,
-      "prompt_tokens": 50,
+      "prompt_tokens": 45,
-      "total_tokens": 60
+      "total_tokens": 55
    }
  },
  {
@ -32,7 +32,7 @@
        "index": 0,
        "logprobs": null,
        "message": {
-          "content": "In a small town, a chicken named Cluck",
+          "content": "A chicken sits on a pile of money, looking",
          "name": null,
          "role": "assistant",
          "tool_calls": null
@ -40,15 +40,15 @@
        "usage": null
      }
    ],
-    "created": 1738753835,
+    "created": 1739290197,
    "id": "",
    "model": "meta-llama/Llama-3.2-11B-Vision-Instruct",
    "object": "chat.completion",
    "system_fingerprint": "3.1.1-dev0-native",
    "usage": {
      "completion_tokens": 10,
-      "prompt_tokens": 50,
+      "prompt_tokens": 45,
-      "total_tokens": 60
+      "total_tokens": 55
    }
  }
 ]
--- a/integration-tests/models/snapshots/test_mllama/test_mllama_simpl.json
+++ b/integration-tests/models/snapshots/test_mllama/test_mllama_simpl.json
@ -5,7 +5,7 @@
      "index": 0,
      "logprobs": null,
      "message": {
-        "content": "In a small town, a chicken named Cluck",
+        "content": "A chicken sits on a pile of money, looking",
        "name": null,
        "role": "assistant",
        "tool_calls": null
@ -13,14 +13,14 @@
      "usage": null
    }
  ],
-  "created": 1738753833,
+  "created": 1739290152,
  "id": "",
  "model": "meta-llama/Llama-3.2-11B-Vision-Instruct",
  "object": "chat.completion",
  "system_fingerprint": "3.1.1-dev0-native",
  "usage": {
    "completion_tokens": 10,
-    "prompt_tokens": 50,
+    "prompt_tokens": 45,
-    "total_tokens": 60
+    "total_tokens": 55
  }
 }
--- a/integration-tests/models/test_mllama.py
+++ b/integration-tests/models/test_mllama.py
@ -28,7 +28,7 @@ async def test_mllama_simpl(mllama, response_snapshot):
                "content": [
                    {
                        "type": "text",
-                        "text": "Can you tell me a very short story based on the image?",
+                        "text": "Describe the image in 10 words.",
                    },
                    {
                        "type": "image_url",
@ -43,11 +43,12 @@ async def test_mllama_simpl(mllama, response_snapshot):
    assert response.usage == {
        "completion_tokens": 10,
-        "prompt_tokens": 50,
+        "prompt_tokens": 45,
-        "total_tokens": 60,
+        "total_tokens": 55,
    }
    assert (
-        response.choices[0].message.content == "In a small town, a chicken named Cluck"
+        response.choices[0].message.content
        == "A chicken sits on a pile of money, looking"
    )
    assert response == response_snapshot
@ -65,7 +66,7 @@ async def test_mllama_load(mllama, generate_load, response_snapshot):
                    "content": [
                        {
                            "type": "text",
-                            "text": "Can you tell me a very short story based on the image?",
+                            "text": "Describe the image in 10 words.",
                        },
                        {
                            "type": "image_url",
@ -86,7 +87,7 @@ async def test_mllama_load(mllama, generate_load, response_snapshot):
    generated_texts = [response.choices[0].message.content for response in responses]
    # XXX: TODO: Fix this test.
-    assert generated_texts[0] == "In a small town, a chicken named Cluck"
+    assert generated_texts[0] == "A chicken sits on a pile of money, looking"
    assert len(generated_texts) == 2
    assert generated_texts, all(
        [text == generated_texts[0] for text in generated_texts]