Update the tests.

2025-09-10 11:54:52 +00:00 · 2025-03-11 13:19:38 +01:00 · 2025-03-11 13:19:38 +01:00 · ef4e2685d8
commit ef4e2685d8
parent 03fe626a95
4 changed files with 342 additions and 1256 deletions
--- a/integration-tests/models/snapshots/test_tools_llama/test_flash_llama_grammar_tools_insufficient_information_nostream.json
+++ b/integration-tests/models/snapshots/test_tools_llama/test_flash_llama_grammar_tools_insufficient_information_nostream.json
@ -5,20 +5,20 @@
      "index": 0,
      "logprobs": null,
      "message": {
-        "content": "I am a helpful assistant!",
+        "content": "I'm an artificial intelligence model known as a large language model (LLM) or conversational AI",
        "role": "assistant",
        "tool_calls": null
      }
    }
  ],
-  "created": 1741263686,
+  "created": 1741693957,
  "id": "",
  "model": "meta-llama/Llama-3.1-8B-Instruct",
  "object": "chat.completion",
  "system_fingerprint": "3.1.2-dev0-native",
  "usage": {
-    "completion_tokens": 23,
-    "prompt_tokens": 494,
-    "total_tokens": 517
+    "completion_tokens": 12,
+    "prompt_tokens": 53,
+    "total_tokens": 65
  }
 }
--- a/integration-tests/models/snapshots/test_tools_llama/test_flash_llama_grammar_tools_insufficient_information_stream.json
+++ b/integration-tests/models/snapshots/test_tools_llama/test_flash_llama_grammar_tools_insufficient_information_stream.json
@ -1,24 +1,4 @@
 [
-  {
-    "choices": [
-      {
-        "delta": {
-          "content": "",
-          "role": "assistant",
-          "tool_calls": null
-        },
-        "finish_reason": null,
-        "index": 0,
-        "logprobs": null
-      }
-    ],
-    "created": 1741364571,
-    "id": "",
-    "model": "meta-llama/Llama-3.1-8B-Instruct",
-    "object": "chat.completion.chunk",
-    "system_fingerprint": "3.1.2-dev0-native",
-    "usage": null
-  },
  {
    "choices": [
      {
@ -32,7 +12,7 @@
        "logprobs": null
      }
    ],
-    "created": 1741364571,
+    "created": 1741694017,
    "id": "",
    "model": "meta-llama/Llama-3.1-8B-Instruct",
    "object": "chat.completion.chunk",
@ -43,7 +23,7 @@
    "choices": [
      {
        "delta": {
-          "content": " am",
+          "content": "'m",
          "role": "assistant",
          "tool_calls": null
        },
@ -52,7 +32,127 @@
        "logprobs": null
      }
    ],
-    "created": 1741364571,
+    "created": 1741694017,
+    "id": "",
+    "model": "meta-llama/Llama-3.1-8B-Instruct",
+    "object": "chat.completion.chunk",
+    "system_fingerprint": "3.1.2-dev0-native",
+    "usage": null
+  },
+  {
+    "choices": [
+      {
+        "delta": {
+          "content": " an",
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "finish_reason": null,
+        "index": 0,
+        "logprobs": null
+      }
+    ],
+    "created": 1741694017,
+    "id": "",
+    "model": "meta-llama/Llama-3.1-8B-Instruct",
+    "object": "chat.completion.chunk",
+    "system_fingerprint": "3.1.2-dev0-native",
+    "usage": null
+  },
+  {
+    "choices": [
+      {
+        "delta": {
+          "content": " artificial",
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "finish_reason": null,
+        "index": 0,
+        "logprobs": null
+      }
+    ],
+    "created": 1741694017,
+    "id": "",
+    "model": "meta-llama/Llama-3.1-8B-Instruct",
+    "object": "chat.completion.chunk",
+    "system_fingerprint": "3.1.2-dev0-native",
+    "usage": null
+  },
+  {
+    "choices": [
+      {
+        "delta": {
+          "content": " intelligence",
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "finish_reason": null,
+        "index": 0,
+        "logprobs": null
+      }
+    ],
+    "created": 1741694017,
+    "id": "",
+    "model": "meta-llama/Llama-3.1-8B-Instruct",
+    "object": "chat.completion.chunk",
+    "system_fingerprint": "3.1.2-dev0-native",
+    "usage": null
+  },
+  {
+    "choices": [
+      {
+        "delta": {
+          "content": " model",
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "finish_reason": null,
+        "index": 0,
+        "logprobs": null
+      }
+    ],
+    "created": 1741694017,
+    "id": "",
+    "model": "meta-llama/Llama-3.1-8B-Instruct",
+    "object": "chat.completion.chunk",
+    "system_fingerprint": "3.1.2-dev0-native",
+    "usage": null
+  },
+  {
+    "choices": [
+      {
+        "delta": {
+          "content": " known",
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "finish_reason": null,
+        "index": 0,
+        "logprobs": null
+      }
+    ],
+    "created": 1741694017,
+    "id": "",
+    "model": "meta-llama/Llama-3.1-8B-Instruct",
+    "object": "chat.completion.chunk",
+    "system_fingerprint": "3.1.2-dev0-native",
+    "usage": null
+  },
+  {
+    "choices": [
+      {
+        "delta": {
+          "content": " as",
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "finish_reason": null,
+        "index": 0,
+        "logprobs": null
+      }
+    ],
+    "created": 1741694017,
    "id": "",
    "model": "meta-llama/Llama-3.1-8B-Instruct",
    "object": "chat.completion.chunk",
@ -72,7 +172,7 @@
        "logprobs": null
      }
    ],
-    "created": 1741364571,
+    "created": 1741694017,
    "id": "",
    "model": "meta-llama/Llama-3.1-8B-Instruct",
    "object": "chat.completion.chunk",
@ -83,7 +183,7 @@
    "choices": [
      {
        "delta": {
-          "content": " helpful",
+          "content": " large",
          "role": "assistant",
          "tool_calls": null
        },
@ -92,7 +192,7 @@
        "logprobs": null
      }
    ],
-    "created": 1741364571,
+    "created": 1741694017,
    "id": "",
    "model": "meta-llama/Llama-3.1-8B-Instruct",
    "object": "chat.completion.chunk",
@ -103,7 +203,7 @@
    "choices": [
      {
        "delta": {
-          "content": " assistant",
+          "content": " language",
          "role": "assistant",
          "tool_calls": null
        },
@ -112,7 +212,7 @@
        "logprobs": null
      }
    ],
-    "created": 1741364571,
+    "created": 1741694017,
    "id": "",
    "model": "meta-llama/Llama-3.1-8B-Instruct",
    "object": "chat.completion.chunk",
@ -123,7 +223,7 @@
    "choices": [
      {
        "delta": {
-          "content": "!",
+          "content": " model",
          "role": "assistant",
          "tool_calls": null
        },
@ -132,7 +232,167 @@
        "logprobs": null
      }
    ],
-    "created": 1741364571,
+    "created": 1741694017,
+    "id": "",
+    "model": "meta-llama/Llama-3.1-8B-Instruct",
+    "object": "chat.completion.chunk",
+    "system_fingerprint": "3.1.2-dev0-native",
+    "usage": null
+  },
+  {
+    "choices": [
+      {
+        "delta": {
+          "content": " (",
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "finish_reason": null,
+        "index": 0,
+        "logprobs": null
+      }
+    ],
+    "created": 1741694017,
+    "id": "",
+    "model": "meta-llama/Llama-3.1-8B-Instruct",
+    "object": "chat.completion.chunk",
+    "system_fingerprint": "3.1.2-dev0-native",
+    "usage": null
+  },
+  {
+    "choices": [
+      {
+        "delta": {
+          "content": "LL",
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "finish_reason": null,
+        "index": 0,
+        "logprobs": null
+      }
+    ],
+    "created": 1741694017,
+    "id": "",
+    "model": "meta-llama/Llama-3.1-8B-Instruct",
+    "object": "chat.completion.chunk",
+    "system_fingerprint": "3.1.2-dev0-native",
+    "usage": null
+  },
+  {
+    "choices": [
+      {
+        "delta": {
+          "content": "M",
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "finish_reason": null,
+        "index": 0,
+        "logprobs": null
+      }
+    ],
+    "created": 1741694017,
+    "id": "",
+    "model": "meta-llama/Llama-3.1-8B-Instruct",
+    "object": "chat.completion.chunk",
+    "system_fingerprint": "3.1.2-dev0-native",
+    "usage": null
+  },
+  {
+    "choices": [
+      {
+        "delta": {
+          "content": ")",
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "finish_reason": null,
+        "index": 0,
+        "logprobs": null
+      }
+    ],
+    "created": 1741694017,
+    "id": "",
+    "model": "meta-llama/Llama-3.1-8B-Instruct",
+    "object": "chat.completion.chunk",
+    "system_fingerprint": "3.1.2-dev0-native",
+    "usage": null
+  },
+  {
+    "choices": [
+      {
+        "delta": {
+          "content": " or",
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "finish_reason": null,
+        "index": 0,
+        "logprobs": null
+      }
+    ],
+    "created": 1741694017,
+    "id": "",
+    "model": "meta-llama/Llama-3.1-8B-Instruct",
+    "object": "chat.completion.chunk",
+    "system_fingerprint": "3.1.2-dev0-native",
+    "usage": null
+  },
+  {
+    "choices": [
+      {
+        "delta": {
+          "content": " convers",
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "finish_reason": null,
+        "index": 0,
+        "logprobs": null
+      }
+    ],
+    "created": 1741694017,
+    "id": "",
+    "model": "meta-llama/Llama-3.1-8B-Instruct",
+    "object": "chat.completion.chunk",
+    "system_fingerprint": "3.1.2-dev0-native",
+    "usage": null
+  },
+  {
+    "choices": [
+      {
+        "delta": {
+          "content": "ational",
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "finish_reason": null,
+        "index": 0,
+        "logprobs": null
+      }
+    ],
+    "created": 1741694017,
+    "id": "",
+    "model": "meta-llama/Llama-3.1-8B-Instruct",
+    "object": "chat.completion.chunk",
+    "system_fingerprint": "3.1.2-dev0-native",
+    "usage": null
+  },
+  {
+    "choices": [
+      {
+        "delta": {
+          "content": " AI",
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null
+      }
+    ],
+    "created": 1741694017,
    "id": "",
    "model": "meta-llama/Llama-3.1-8B-Instruct",
    "object": "chat.completion.chunk",
--- a/integration-tests/models/snapshots/test_tools_llama/test_flash_llama_grammar_tools_sea_creatures_stream_auto.json
+++ b/integration-tests/models/snapshots/test_tools_llama/test_flash_llama_grammar_tools_sea_creatures_stream_auto.json
--- a/integration-tests/models/test_tools_llama.py
+++ b/integration-tests/models/test_tools_llama.py
@ -279,7 +279,7 @@ async def test_flash_llama_grammar_tools_insufficient_information_nostream(
 ):
    client = InferenceClient(base_url=f"{flash_llama_grammar_tools.base_url}/v1")
    response = client.chat_completion(
-        max_tokens=100,
+        max_tokens=20,
        seed=24,
        tools=tools,
        tool_choice="auto",
@ -299,7 +299,10 @@ async def test_flash_llama_grammar_tools_insufficient_information_nostream(
    content_generated = response.choices[0].message.content
    assert response.choices[0].message.tool_calls is None

-    assert content_generated == "I am a helpful assistant!"
+    assert (
+        content_generated
+        == "I'm an artificial intelligence model known as a large language model (LLM) or conversational AI"
+    )
    assert response == response_snapshot


@ -310,7 +313,7 @@ async def test_flash_llama_grammar_tools_insufficient_information_stream(
 ):
    client = InferenceClient(base_url=f"{flash_llama_grammar_tools.base_url}/v1")
    stream = client.chat_completion(
-        max_tokens=100,
+        max_tokens=20,
        seed=24,
        tools=tools,
        tool_choice="auto",
@ -335,7 +338,10 @@ async def test_flash_llama_grammar_tools_insufficient_information_stream(
        assert chunk.choices[0].delta.tool_calls is None

    ######## This is exactly the same as the non streaming case
-    assert content_generated == "I am a helpful assistant!"
+    assert (
+        content_generated
+        == "I'm an artificial intelligence model known as a large language model (LLM) or conversational AI"
+    )
    assert chunks == response_snapshot


@ -346,7 +352,7 @@ async def test_flash_llama_grammar_tools_sea_creatures_stream_auto(
 ):
    client = InferenceClient(base_url=f"{flash_llama_grammar_tools.base_url}/v1")
    stream = client.chat_completion(
-        max_tokens=100,
+        max_tokens=20,
        seed=24,
        tools=tools,
        tool_choice="auto",
@ -372,7 +378,7 @@ async def test_flash_llama_grammar_tools_sea_creatures_stream_auto(

    assert (
        content_generated
-        == "There was a wise old octopus named Oracle. He lived in a cozy little cave beneath the waves with his best friend, a curious seahorse named Finley. One day, Finley met a playful dolphin named Daisy, and the three became inseparable. They spent their days exploring the ocean, playing hide-and-seek, and learning about the wonders of the sea from Oracle."
+        == "Once upon a time, in a vibrant ocean filled with coral reefs and schools of shimmering fish,"
    )
    assert chunks == response_snapshot