Tweak for multi prompt.

This commit is contained in:
Nicolas Patry 2025-03-07 10:34:33 +01:00
parent 9aa71d61fb
commit e2f4eed6d6
No known key found for this signature in database
GPG Key ID: 4242CEF24CB6DBF9
3 changed files with 673 additions and 3 deletions

View File

@ -8,6 +8,7 @@ from huggingface_hub.inference._generated.types.chat_completion import (
from openai.types.chat.chat_completion_chunk import ( from openai.types.chat.chat_completion_chunk import (
ChatCompletionChunk as OAIChatCompletionChunk, ChatCompletionChunk as OAIChatCompletionChunk,
) )
from openai.types.completion import Completion as OAICompletion
import requests import requests
@ -132,6 +133,7 @@ class ResponseComparator(JSONSnapshotExtension):
or isinstance(data, ChatCompletionComplete) or isinstance(data, ChatCompletionComplete)
or isinstance(data, Completion) or isinstance(data, Completion)
or isinstance(data, OAIChatCompletionChunk) or isinstance(data, OAIChatCompletionChunk)
or isinstance(data, OAICompletion)
): ):
data = data.model_dump() data = data.model_dump()
elif isinstance(data, ChatCompletionStreamOutput) or isinstance( elif isinstance(data, ChatCompletionStreamOutput) or isinstance(

View File

@ -0,0 +1,666 @@
[
{
"choices": [
{
"finish_reason": "",
"index": 0,
"logprobs": null,
"text": " A"
}
],
"created": 1741340006,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"finish_reason": "",
"index": 1,
"logprobs": null,
"text": " This"
}
],
"created": 1741340006,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"finish_reason": "",
"index": 2,
"logprobs": null,
"text": " Paris"
}
],
"created": 1741340006,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"finish_reason": "",
"index": 3,
"logprobs": null,
"text": "us"
}
],
"created": 1741340006,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"finish_reason": "",
"index": 0,
"logprobs": null,
"text": " Beginner"
}
],
"created": 1741340006,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"finish_reason": "",
"index": 1,
"logprobs": null,
"text": " is"
}
],
"created": 1741340006,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"finish_reason": "",
"index": 2,
"logprobs": null,
"text": "\n"
}
],
"created": 1741340007,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"finish_reason": "",
"index": 3,
"logprobs": null,
"text": "cul"
}
],
"created": 1741340007,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"finish_reason": "",
"index": 0,
"logprobs": null,
"text": "s"
}
],
"created": 1741340007,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"finish_reason": "",
"index": 1,
"logprobs": null,
"text": " a"
}
],
"created": 1741340007,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"finish_reason": "",
"index": 2,
"logprobs": null,
"text": "What"
}
],
"created": 1741340007,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"finish_reason": "",
"index": 3,
"logprobs": null,
"text": "as"
}
],
"created": 1741340007,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"finish_reason": "",
"index": 0,
"logprobs": null,
"text": " Guide"
}
],
"created": 1741340007,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"finish_reason": "",
"index": 1,
"logprobs": null,
"text": " question"
}
],
"created": 1741340007,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"finish_reason": "",
"index": 2,
"logprobs": null,
"text": " is"
}
],
"created": 1741340007,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"finish_reason": "",
"index": 3,
"logprobs": null,
"text": "_minus"
}
],
"created": 1741340007,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"finish_reason": "",
"index": 0,
"logprobs": null,
"text": "\n"
}
],
"created": 1741340007,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"finish_reason": "",
"index": 1,
"logprobs": null,
"text": " that"
}
],
"created": 1741340007,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"finish_reason": "",
"index": 2,
"logprobs": null,
"text": " the"
}
],
"created": 1741340007,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"finish_reason": "",
"index": 3,
"logprobs": null,
"text": "cul"
}
],
"created": 1741340007,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"finish_reason": "",
"index": 0,
"logprobs": null,
"text": "Deep"
}
],
"created": 1741340007,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"finish_reason": "",
"index": 1,
"logprobs": null,
"text": " has"
}
],
"created": 1741340007,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"finish_reason": "",
"index": 2,
"logprobs": null,
"text": " capital"
}
],
"created": 1741340007,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"finish_reason": "",
"index": 3,
"logprobs": null,
"text": "as"
}
],
"created": 1741340007,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"finish_reason": "",
"index": 0,
"logprobs": null,
"text": " learning"
}
],
"created": 1741340007,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"finish_reason": "",
"index": 1,
"logprobs": null,
"text": " puzzled"
}
],
"created": 1741340007,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"finish_reason": "",
"index": 2,
"logprobs": null,
"text": " of"
}
],
"created": 1741340007,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"finish_reason": "",
"index": 3,
"logprobs": null,
"text": "(s"
}
],
"created": 1741340007,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"finish_reason": "",
"index": 0,
"logprobs": null,
"text": " is"
}
],
"created": 1741340007,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"finish_reason": "",
"index": 1,
"logprobs": null,
"text": " many"
}
],
"created": 1741340007,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"finish_reason": "",
"index": 2,
"logprobs": null,
"text": " France"
}
],
"created": 1741340007,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"finish_reason": "",
"index": 3,
"logprobs": null,
"text": "):\n"
}
],
"created": 1741340007,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"finish_reason": "",
"index": 0,
"logprobs": null,
"text": " a"
}
],
"created": 1741340007,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"finish_reason": "",
"index": 1,
"logprobs": null,
"text": " people"
}
],
"created": 1741340007,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"finish_reason": "",
"index": 2,
"logprobs": null,
"text": "?\n"
}
],
"created": 1741340007,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"finish_reason": "",
"index": 3,
"logprobs": null,
"text": " "
}
],
"created": 1741340007,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"finish_reason": "length",
"index": 0,
"logprobs": null,
"text": " subset"
}
],
"created": 1741340007,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion",
"system_fingerprint": "3.1.2-dev0-native",
"usage": {
"completion_tokens": 10,
"completion_tokens_details": null,
"prompt_tokens": 6,
"prompt_tokens_details": null,
"total_tokens": 16
}
},
{
"choices": [
{
"finish_reason": "length",
"index": 1,
"logprobs": null,
"text": " for"
}
],
"created": 1741340007,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion",
"system_fingerprint": "3.1.2-dev0-native",
"usage": {
"completion_tokens": 10,
"completion_tokens_details": null,
"prompt_tokens": 5,
"prompt_tokens_details": null,
"total_tokens": 15
}
},
{
"choices": [
{
"finish_reason": "length",
"index": 2,
"logprobs": null,
"text": "The"
}
],
"created": 1741340007,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion",
"system_fingerprint": "3.1.2-dev0-native",
"usage": {
"completion_tokens": 10,
"completion_tokens_details": null,
"prompt_tokens": 8,
"prompt_tokens_details": null,
"total_tokens": 18
}
},
{
"choices": [
{
"finish_reason": "length",
"index": 3,
"logprobs": null,
"text": " \"\"\"\n"
}
],
"created": 1741340007,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion",
"system_fingerprint": "3.1.2-dev0-native",
"usage": {
"completion_tokens": 10,
"completion_tokens_details": null,
"prompt_tokens": 3,
"prompt_tokens_details": null,
"total_tokens": 13
}
}
]

View File

@ -1,7 +1,10 @@
import pytest import pytest
import requests import requests
import json
from aiohttp import ClientSession
from openai import OpenAI from openai import OpenAI
from huggingface_hub import InferenceClient from huggingface_hub import InferenceClient
from text_generation.types import Completion
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
@ -155,8 +158,8 @@ def test_flash_llama_completion_many_prompts(flash_llama_completion, response_sn
async def test_flash_llama_completion_many_prompts_stream( async def test_flash_llama_completion_many_prompts_stream(
flash_llama_completion, response_snapshot flash_llama_completion, response_snapshot
): ):
client = InferenceClient(base_url=f"{flash_llama_completion.base_url}/v1") client = OpenAI(api_key="xx", base_url=f"{flash_llama_completion.base_url}/v1")
stream = client.completion( stream = client.completions.create(
model="tgi", model="tgi",
prompt=[ prompt=[
"What is Deep Learning?", "What is Deep Learning?",
@ -174,7 +177,6 @@ async def test_flash_llama_completion_many_prompts_stream(
chunks = [] chunks = []
for chunk in stream: for chunk in stream:
chunks.append(chunk) chunks.append(chunk)
assert "choices" in chunk
index = chunk.choices[0].index index = chunk.choices[0].index
assert 0 <= index <= 4 assert 0 <= index <= 4
strings[index] += chunk.choices[0].text strings[index] += chunk.choices[0].text