Tweak for multi prompt.

2025-09-09 19:34:53 +00:00 · 2025-03-07 10:34:33 +01:00 · 2025-03-07 10:34:33 +01:00 · e2f4eed6d6
commit e2f4eed6d6
parent 9aa71d61fb
3 changed files with 673 additions and 3 deletions
--- a/integration-tests/conftest.py
+++ b/integration-tests/conftest.py
@ -8,6 +8,7 @@ from huggingface_hub.inference._generated.types.chat_completion import (
 from openai.types.chat.chat_completion_chunk import (
    ChatCompletionChunk as OAIChatCompletionChunk,
 )
 from openai.types.completion import Completion as OAICompletion
 import requests
@ -132,6 +133,7 @@ class ResponseComparator(JSONSnapshotExtension):
            or isinstance(data, ChatCompletionComplete)
            or isinstance(data, Completion)
            or isinstance(data, OAIChatCompletionChunk)
            or isinstance(data, OAICompletion)
        ):
            data = data.model_dump()
        elif isinstance(data, ChatCompletionStreamOutput) or isinstance(
--- a/integration-tests/models/snapshots/test_completion_prompts/test_flash_llama_completion_many_prompts_stream.json
+++ b/integration-tests/models/snapshots/test_completion_prompts/test_flash_llama_completion_many_prompts_stream.json
@ -0,0 +1,666 @@
 [
  {
    "choices": [
      {
        "finish_reason": "",
        "index": 0,
        "logprobs": null,
        "text": " A"
      }
    ],
    "created": 1741340006,
    "id": "",
    "model": "meta-llama/Llama-3.1-8B-Instruct",
    "object": "text_completion",
    "system_fingerprint": "3.1.2-dev0-native",
    "usage": null
  },
  {
    "choices": [
      {
        "finish_reason": "",
        "index": 1,
        "logprobs": null,
        "text": " This"
      }
    ],
    "created": 1741340006,
    "id": "",
    "model": "meta-llama/Llama-3.1-8B-Instruct",
    "object": "text_completion",
    "system_fingerprint": "3.1.2-dev0-native",
    "usage": null
  },
  {
    "choices": [
      {
        "finish_reason": "",
        "index": 2,
        "logprobs": null,
        "text": " Paris"
      }
    ],
    "created": 1741340006,
    "id": "",
    "model": "meta-llama/Llama-3.1-8B-Instruct",
    "object": "text_completion",
    "system_fingerprint": "3.1.2-dev0-native",
    "usage": null
  },
  {
    "choices": [
      {
        "finish_reason": "",
        "index": 3,
        "logprobs": null,
        "text": "us"
      }
    ],
    "created": 1741340006,
    "id": "",
    "model": "meta-llama/Llama-3.1-8B-Instruct",
    "object": "text_completion",
    "system_fingerprint": "3.1.2-dev0-native",
    "usage": null
  },
  {
    "choices": [
      {
        "finish_reason": "",
        "index": 0,
        "logprobs": null,
        "text": " Beginner"
      }
    ],
    "created": 1741340006,
    "id": "",
    "model": "meta-llama/Llama-3.1-8B-Instruct",
    "object": "text_completion",
    "system_fingerprint": "3.1.2-dev0-native",
    "usage": null
  },
  {
    "choices": [
      {
        "finish_reason": "",
        "index": 1,
        "logprobs": null,
        "text": " is"
      }
    ],
    "created": 1741340006,
    "id": "",
    "model": "meta-llama/Llama-3.1-8B-Instruct",
    "object": "text_completion",
    "system_fingerprint": "3.1.2-dev0-native",
    "usage": null
  },
  {
    "choices": [
      {
        "finish_reason": "",
        "index": 2,
        "logprobs": null,
        "text": "\n"
      }
    ],
    "created": 1741340007,
    "id": "",
    "model": "meta-llama/Llama-3.1-8B-Instruct",
    "object": "text_completion",
    "system_fingerprint": "3.1.2-dev0-native",
    "usage": null
  },
  {
    "choices": [
      {
        "finish_reason": "",
        "index": 3,
        "logprobs": null,
        "text": "cul"
      }
    ],
    "created": 1741340007,
    "id": "",
    "model": "meta-llama/Llama-3.1-8B-Instruct",
    "object": "text_completion",
    "system_fingerprint": "3.1.2-dev0-native",
    "usage": null
  },
  {
    "choices": [
      {
        "finish_reason": "",
        "index": 0,
        "logprobs": null,
        "text": "’s"
      }
    ],
    "created": 1741340007,
    "id": "",
    "model": "meta-llama/Llama-3.1-8B-Instruct",
    "object": "text_completion",
    "system_fingerprint": "3.1.2-dev0-native",
    "usage": null
  },
  {
    "choices": [
      {
        "finish_reason": "",
        "index": 1,
        "logprobs": null,
        "text": " a"
      }
    ],
    "created": 1741340007,
    "id": "",
    "model": "meta-llama/Llama-3.1-8B-Instruct",
    "object": "text_completion",
    "system_fingerprint": "3.1.2-dev0-native",
    "usage": null
  },
  {
    "choices": [
      {
        "finish_reason": "",
        "index": 2,
        "logprobs": null,
        "text": "What"
      }
    ],
    "created": 1741340007,
    "id": "",
    "model": "meta-llama/Llama-3.1-8B-Instruct",
    "object": "text_completion",
    "system_fingerprint": "3.1.2-dev0-native",
    "usage": null
  },
  {
    "choices": [
      {
        "finish_reason": "",
        "index": 3,
        "logprobs": null,
        "text": "as"
      }
    ],
    "created": 1741340007,
    "id": "",
    "model": "meta-llama/Llama-3.1-8B-Instruct",
    "object": "text_completion",
    "system_fingerprint": "3.1.2-dev0-native",
    "usage": null
  },
  {
    "choices": [
      {
        "finish_reason": "",
        "index": 0,
        "logprobs": null,
        "text": " Guide"
      }
    ],
    "created": 1741340007,
    "id": "",
    "model": "meta-llama/Llama-3.1-8B-Instruct",
    "object": "text_completion",
    "system_fingerprint": "3.1.2-dev0-native",
    "usage": null
  },
  {
    "choices": [
      {
        "finish_reason": "",
        "index": 1,
        "logprobs": null,
        "text": " question"
      }
    ],
    "created": 1741340007,
    "id": "",
    "model": "meta-llama/Llama-3.1-8B-Instruct",
    "object": "text_completion",
    "system_fingerprint": "3.1.2-dev0-native",
    "usage": null
  },
  {
    "choices": [
      {
        "finish_reason": "",
        "index": 2,
        "logprobs": null,
        "text": " is"
      }
    ],
    "created": 1741340007,
    "id": "",
    "model": "meta-llama/Llama-3.1-8B-Instruct",
    "object": "text_completion",
    "system_fingerprint": "3.1.2-dev0-native",
    "usage": null
  },
  {
    "choices": [
      {
        "finish_reason": "",
        "index": 3,
        "logprobs": null,
        "text": "_minus"
      }
    ],
    "created": 1741340007,
    "id": "",
    "model": "meta-llama/Llama-3.1-8B-Instruct",
    "object": "text_completion",
    "system_fingerprint": "3.1.2-dev0-native",
    "usage": null
  },
  {
    "choices": [
      {
        "finish_reason": "",
        "index": 0,
        "logprobs": null,
        "text": "\n"
      }
    ],
    "created": 1741340007,
    "id": "",
    "model": "meta-llama/Llama-3.1-8B-Instruct",
    "object": "text_completion",
    "system_fingerprint": "3.1.2-dev0-native",
    "usage": null
  },
  {
    "choices": [
      {
        "finish_reason": "",
        "index": 1,
        "logprobs": null,
        "text": " that"
      }
    ],
    "created": 1741340007,
    "id": "",
    "model": "meta-llama/Llama-3.1-8B-Instruct",
    "object": "text_completion",
    "system_fingerprint": "3.1.2-dev0-native",
    "usage": null
  },
  {
    "choices": [
      {
        "finish_reason": "",
        "index": 2,
        "logprobs": null,
        "text": " the"
      }
    ],
    "created": 1741340007,
    "id": "",
    "model": "meta-llama/Llama-3.1-8B-Instruct",
    "object": "text_completion",
    "system_fingerprint": "3.1.2-dev0-native",
    "usage": null
  },
  {
    "choices": [
      {
        "finish_reason": "",
        "index": 3,
        "logprobs": null,
        "text": "cul"
      }
    ],
    "created": 1741340007,
    "id": "",
    "model": "meta-llama/Llama-3.1-8B-Instruct",
    "object": "text_completion",
    "system_fingerprint": "3.1.2-dev0-native",
    "usage": null
  },
  {
    "choices": [
      {
        "finish_reason": "",
        "index": 0,
        "logprobs": null,
        "text": "Deep"
      }
    ],
    "created": 1741340007,
    "id": "",
    "model": "meta-llama/Llama-3.1-8B-Instruct",
    "object": "text_completion",
    "system_fingerprint": "3.1.2-dev0-native",
    "usage": null
  },
  {
    "choices": [
      {
        "finish_reason": "",
        "index": 1,
        "logprobs": null,
        "text": " has"
      }
    ],
    "created": 1741340007,
    "id": "",
    "model": "meta-llama/Llama-3.1-8B-Instruct",
    "object": "text_completion",
    "system_fingerprint": "3.1.2-dev0-native",
    "usage": null
  },
  {
    "choices": [
      {
        "finish_reason": "",
        "index": 2,
        "logprobs": null,
        "text": " capital"
      }
    ],
    "created": 1741340007,
    "id": "",
    "model": "meta-llama/Llama-3.1-8B-Instruct",
    "object": "text_completion",
    "system_fingerprint": "3.1.2-dev0-native",
    "usage": null
  },
  {
    "choices": [
      {
        "finish_reason": "",
        "index": 3,
        "logprobs": null,
        "text": "as"
      }
    ],
    "created": 1741340007,
    "id": "",
    "model": "meta-llama/Llama-3.1-8B-Instruct",
    "object": "text_completion",
    "system_fingerprint": "3.1.2-dev0-native",
    "usage": null
  },
  {
    "choices": [
      {
        "finish_reason": "",
        "index": 0,
        "logprobs": null,
        "text": " learning"
      }
    ],
    "created": 1741340007,
    "id": "",
    "model": "meta-llama/Llama-3.1-8B-Instruct",
    "object": "text_completion",
    "system_fingerprint": "3.1.2-dev0-native",
    "usage": null
  },
  {
    "choices": [
      {
        "finish_reason": "",
        "index": 1,
        "logprobs": null,
        "text": " puzzled"
      }
    ],
    "created": 1741340007,
    "id": "",
    "model": "meta-llama/Llama-3.1-8B-Instruct",
    "object": "text_completion",
    "system_fingerprint": "3.1.2-dev0-native",
    "usage": null
  },
  {
    "choices": [
      {
        "finish_reason": "",
        "index": 2,
        "logprobs": null,
        "text": " of"
      }
    ],
    "created": 1741340007,
    "id": "",
    "model": "meta-llama/Llama-3.1-8B-Instruct",
    "object": "text_completion",
    "system_fingerprint": "3.1.2-dev0-native",
    "usage": null
  },
  {
    "choices": [
      {
        "finish_reason": "",
        "index": 3,
        "logprobs": null,
        "text": "(s"
      }
    ],
    "created": 1741340007,
    "id": "",
    "model": "meta-llama/Llama-3.1-8B-Instruct",
    "object": "text_completion",
    "system_fingerprint": "3.1.2-dev0-native",
    "usage": null
  },
  {
    "choices": [
      {
        "finish_reason": "",
        "index": 0,
        "logprobs": null,
        "text": " is"
      }
    ],
    "created": 1741340007,
    "id": "",
    "model": "meta-llama/Llama-3.1-8B-Instruct",
    "object": "text_completion",
    "system_fingerprint": "3.1.2-dev0-native",
    "usage": null
  },
  {
    "choices": [
      {
        "finish_reason": "",
        "index": 1,
        "logprobs": null,
        "text": " many"
      }
    ],
    "created": 1741340007,
    "id": "",
    "model": "meta-llama/Llama-3.1-8B-Instruct",
    "object": "text_completion",
    "system_fingerprint": "3.1.2-dev0-native",
    "usage": null
  },
  {
    "choices": [
      {
        "finish_reason": "",
        "index": 2,
        "logprobs": null,
        "text": " France"
      }
    ],
    "created": 1741340007,
    "id": "",
    "model": "meta-llama/Llama-3.1-8B-Instruct",
    "object": "text_completion",
    "system_fingerprint": "3.1.2-dev0-native",
    "usage": null
  },
  {
    "choices": [
      {
        "finish_reason": "",
        "index": 3,
        "logprobs": null,
        "text": "):\n"
      }
    ],
    "created": 1741340007,
    "id": "",
    "model": "meta-llama/Llama-3.1-8B-Instruct",
    "object": "text_completion",
    "system_fingerprint": "3.1.2-dev0-native",
    "usage": null
  },
  {
    "choices": [
      {
        "finish_reason": "",
        "index": 0,
        "logprobs": null,
        "text": " a"
      }
    ],
    "created": 1741340007,
    "id": "",
    "model": "meta-llama/Llama-3.1-8B-Instruct",
    "object": "text_completion",
    "system_fingerprint": "3.1.2-dev0-native",
    "usage": null
  },
  {
    "choices": [
      {
        "finish_reason": "",
        "index": 1,
        "logprobs": null,
        "text": " people"
      }
    ],
    "created": 1741340007,
    "id": "",
    "model": "meta-llama/Llama-3.1-8B-Instruct",
    "object": "text_completion",
    "system_fingerprint": "3.1.2-dev0-native",
    "usage": null
  },
  {
    "choices": [
      {
        "finish_reason": "",
        "index": 2,
        "logprobs": null,
        "text": "?\n"
      }
    ],
    "created": 1741340007,
    "id": "",
    "model": "meta-llama/Llama-3.1-8B-Instruct",
    "object": "text_completion",
    "system_fingerprint": "3.1.2-dev0-native",
    "usage": null
  },
  {
    "choices": [
      {
        "finish_reason": "",
        "index": 3,
        "logprobs": null,
        "text": "   "
      }
    ],
    "created": 1741340007,
    "id": "",
    "model": "meta-llama/Llama-3.1-8B-Instruct",
    "object": "text_completion",
    "system_fingerprint": "3.1.2-dev0-native",
    "usage": null
  },
  {
    "choices": [
      {
        "finish_reason": "length",
        "index": 0,
        "logprobs": null,
        "text": " subset"
      }
    ],
    "created": 1741340007,
    "id": "",
    "model": "meta-llama/Llama-3.1-8B-Instruct",
    "object": "text_completion",
    "system_fingerprint": "3.1.2-dev0-native",
    "usage": {
      "completion_tokens": 10,
      "completion_tokens_details": null,
      "prompt_tokens": 6,
      "prompt_tokens_details": null,
      "total_tokens": 16
    }
  },
  {
    "choices": [
      {
        "finish_reason": "length",
        "index": 1,
        "logprobs": null,
        "text": " for"
      }
    ],
    "created": 1741340007,
    "id": "",
    "model": "meta-llama/Llama-3.1-8B-Instruct",
    "object": "text_completion",
    "system_fingerprint": "3.1.2-dev0-native",
    "usage": {
      "completion_tokens": 10,
      "completion_tokens_details": null,
      "prompt_tokens": 5,
      "prompt_tokens_details": null,
      "total_tokens": 15
    }
  },
  {
    "choices": [
      {
        "finish_reason": "length",
        "index": 2,
        "logprobs": null,
        "text": "The"
      }
    ],
    "created": 1741340007,
    "id": "",
    "model": "meta-llama/Llama-3.1-8B-Instruct",
    "object": "text_completion",
    "system_fingerprint": "3.1.2-dev0-native",
    "usage": {
      "completion_tokens": 10,
      "completion_tokens_details": null,
      "prompt_tokens": 8,
      "prompt_tokens_details": null,
      "total_tokens": 18
    }
  },
  {
    "choices": [
      {
        "finish_reason": "length",
        "index": 3,
        "logprobs": null,
        "text": " \"\"\"\n"
      }
    ],
    "created": 1741340007,
    "id": "",
    "model": "meta-llama/Llama-3.1-8B-Instruct",
    "object": "text_completion",
    "system_fingerprint": "3.1.2-dev0-native",
    "usage": {
      "completion_tokens": 10,
      "completion_tokens_details": null,
      "prompt_tokens": 3,
      "prompt_tokens_details": null,
      "total_tokens": 13
    }
  }
 ]
--- a/integration-tests/models/test_completion_prompts.py
+++ b/integration-tests/models/test_completion_prompts.py
@ -1,7 +1,10 @@
 import pytest
 import requests
 import json
 from aiohttp import ClientSession
 from openai import OpenAI
 from huggingface_hub import InferenceClient
 from text_generation.types import Completion
@pytest.fixture(scope="module")
@ -155,8 +158,8 @@ def test_flash_llama_completion_many_prompts(flash_llama_completion, response_sn
 async def test_flash_llama_completion_many_prompts_stream(
    flash_llama_completion, response_snapshot
 ):
-    client = InferenceClient(base_url=f"{flash_llama_completion.base_url}/v1")
+    client = OpenAI(api_key="xx", base_url=f"{flash_llama_completion.base_url}/v1")
-    stream = client.completion(
+    stream = client.completions.create(
        model="tgi",
        prompt=[
            "What is Deep Learning?",
@ -174,7 +177,6 @@ async def test_flash_llama_completion_many_prompts_stream(
    chunks = []
    for chunk in stream:
        chunks.append(chunk)
        assert "choices" in chunk
        index = chunk.choices[0].index
        assert 0 <= index <= 4
        strings[index] += chunk.choices[0].text