mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-26 12:32:10 +00:00
Tweak for multi prompt.
This commit is contained in:
parent
9aa71d61fb
commit
e2f4eed6d6
@ -8,6 +8,7 @@ from huggingface_hub.inference._generated.types.chat_completion import (
|
||||
from openai.types.chat.chat_completion_chunk import (
|
||||
ChatCompletionChunk as OAIChatCompletionChunk,
|
||||
)
|
||||
from openai.types.completion import Completion as OAICompletion
|
||||
import requests
|
||||
|
||||
|
||||
@ -132,6 +133,7 @@ class ResponseComparator(JSONSnapshotExtension):
|
||||
or isinstance(data, ChatCompletionComplete)
|
||||
or isinstance(data, Completion)
|
||||
or isinstance(data, OAIChatCompletionChunk)
|
||||
or isinstance(data, OAICompletion)
|
||||
):
|
||||
data = data.model_dump()
|
||||
elif isinstance(data, ChatCompletionStreamOutput) or isinstance(
|
||||
|
@ -0,0 +1,666 @@
|
||||
[
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": " A"
|
||||
}
|
||||
],
|
||||
"created": 1741340006,
|
||||
"id": "",
|
||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "3.1.2-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "",
|
||||
"index": 1,
|
||||
"logprobs": null,
|
||||
"text": " This"
|
||||
}
|
||||
],
|
||||
"created": 1741340006,
|
||||
"id": "",
|
||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "3.1.2-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "",
|
||||
"index": 2,
|
||||
"logprobs": null,
|
||||
"text": " Paris"
|
||||
}
|
||||
],
|
||||
"created": 1741340006,
|
||||
"id": "",
|
||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "3.1.2-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "",
|
||||
"index": 3,
|
||||
"logprobs": null,
|
||||
"text": "us"
|
||||
}
|
||||
],
|
||||
"created": 1741340006,
|
||||
"id": "",
|
||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "3.1.2-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": " Beginner"
|
||||
}
|
||||
],
|
||||
"created": 1741340006,
|
||||
"id": "",
|
||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "3.1.2-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "",
|
||||
"index": 1,
|
||||
"logprobs": null,
|
||||
"text": " is"
|
||||
}
|
||||
],
|
||||
"created": 1741340006,
|
||||
"id": "",
|
||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "3.1.2-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "",
|
||||
"index": 2,
|
||||
"logprobs": null,
|
||||
"text": "\n"
|
||||
}
|
||||
],
|
||||
"created": 1741340007,
|
||||
"id": "",
|
||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "3.1.2-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "",
|
||||
"index": 3,
|
||||
"logprobs": null,
|
||||
"text": "cul"
|
||||
}
|
||||
],
|
||||
"created": 1741340007,
|
||||
"id": "",
|
||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "3.1.2-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": "’s"
|
||||
}
|
||||
],
|
||||
"created": 1741340007,
|
||||
"id": "",
|
||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "3.1.2-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "",
|
||||
"index": 1,
|
||||
"logprobs": null,
|
||||
"text": " a"
|
||||
}
|
||||
],
|
||||
"created": 1741340007,
|
||||
"id": "",
|
||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "3.1.2-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "",
|
||||
"index": 2,
|
||||
"logprobs": null,
|
||||
"text": "What"
|
||||
}
|
||||
],
|
||||
"created": 1741340007,
|
||||
"id": "",
|
||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "3.1.2-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "",
|
||||
"index": 3,
|
||||
"logprobs": null,
|
||||
"text": "as"
|
||||
}
|
||||
],
|
||||
"created": 1741340007,
|
||||
"id": "",
|
||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "3.1.2-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": " Guide"
|
||||
}
|
||||
],
|
||||
"created": 1741340007,
|
||||
"id": "",
|
||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "3.1.2-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "",
|
||||
"index": 1,
|
||||
"logprobs": null,
|
||||
"text": " question"
|
||||
}
|
||||
],
|
||||
"created": 1741340007,
|
||||
"id": "",
|
||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "3.1.2-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "",
|
||||
"index": 2,
|
||||
"logprobs": null,
|
||||
"text": " is"
|
||||
}
|
||||
],
|
||||
"created": 1741340007,
|
||||
"id": "",
|
||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "3.1.2-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "",
|
||||
"index": 3,
|
||||
"logprobs": null,
|
||||
"text": "_minus"
|
||||
}
|
||||
],
|
||||
"created": 1741340007,
|
||||
"id": "",
|
||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "3.1.2-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": "\n"
|
||||
}
|
||||
],
|
||||
"created": 1741340007,
|
||||
"id": "",
|
||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "3.1.2-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "",
|
||||
"index": 1,
|
||||
"logprobs": null,
|
||||
"text": " that"
|
||||
}
|
||||
],
|
||||
"created": 1741340007,
|
||||
"id": "",
|
||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "3.1.2-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "",
|
||||
"index": 2,
|
||||
"logprobs": null,
|
||||
"text": " the"
|
||||
}
|
||||
],
|
||||
"created": 1741340007,
|
||||
"id": "",
|
||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "3.1.2-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "",
|
||||
"index": 3,
|
||||
"logprobs": null,
|
||||
"text": "cul"
|
||||
}
|
||||
],
|
||||
"created": 1741340007,
|
||||
"id": "",
|
||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "3.1.2-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": "Deep"
|
||||
}
|
||||
],
|
||||
"created": 1741340007,
|
||||
"id": "",
|
||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "3.1.2-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "",
|
||||
"index": 1,
|
||||
"logprobs": null,
|
||||
"text": " has"
|
||||
}
|
||||
],
|
||||
"created": 1741340007,
|
||||
"id": "",
|
||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "3.1.2-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "",
|
||||
"index": 2,
|
||||
"logprobs": null,
|
||||
"text": " capital"
|
||||
}
|
||||
],
|
||||
"created": 1741340007,
|
||||
"id": "",
|
||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "3.1.2-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "",
|
||||
"index": 3,
|
||||
"logprobs": null,
|
||||
"text": "as"
|
||||
}
|
||||
],
|
||||
"created": 1741340007,
|
||||
"id": "",
|
||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "3.1.2-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": " learning"
|
||||
}
|
||||
],
|
||||
"created": 1741340007,
|
||||
"id": "",
|
||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "3.1.2-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "",
|
||||
"index": 1,
|
||||
"logprobs": null,
|
||||
"text": " puzzled"
|
||||
}
|
||||
],
|
||||
"created": 1741340007,
|
||||
"id": "",
|
||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "3.1.2-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "",
|
||||
"index": 2,
|
||||
"logprobs": null,
|
||||
"text": " of"
|
||||
}
|
||||
],
|
||||
"created": 1741340007,
|
||||
"id": "",
|
||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "3.1.2-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "",
|
||||
"index": 3,
|
||||
"logprobs": null,
|
||||
"text": "(s"
|
||||
}
|
||||
],
|
||||
"created": 1741340007,
|
||||
"id": "",
|
||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "3.1.2-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": " is"
|
||||
}
|
||||
],
|
||||
"created": 1741340007,
|
||||
"id": "",
|
||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "3.1.2-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "",
|
||||
"index": 1,
|
||||
"logprobs": null,
|
||||
"text": " many"
|
||||
}
|
||||
],
|
||||
"created": 1741340007,
|
||||
"id": "",
|
||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "3.1.2-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "",
|
||||
"index": 2,
|
||||
"logprobs": null,
|
||||
"text": " France"
|
||||
}
|
||||
],
|
||||
"created": 1741340007,
|
||||
"id": "",
|
||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "3.1.2-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "",
|
||||
"index": 3,
|
||||
"logprobs": null,
|
||||
"text": "):\n"
|
||||
}
|
||||
],
|
||||
"created": 1741340007,
|
||||
"id": "",
|
||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "3.1.2-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": " a"
|
||||
}
|
||||
],
|
||||
"created": 1741340007,
|
||||
"id": "",
|
||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "3.1.2-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "",
|
||||
"index": 1,
|
||||
"logprobs": null,
|
||||
"text": " people"
|
||||
}
|
||||
],
|
||||
"created": 1741340007,
|
||||
"id": "",
|
||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "3.1.2-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "",
|
||||
"index": 2,
|
||||
"logprobs": null,
|
||||
"text": "?\n"
|
||||
}
|
||||
],
|
||||
"created": 1741340007,
|
||||
"id": "",
|
||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "3.1.2-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "",
|
||||
"index": 3,
|
||||
"logprobs": null,
|
||||
"text": " "
|
||||
}
|
||||
],
|
||||
"created": 1741340007,
|
||||
"id": "",
|
||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "3.1.2-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "length",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": " subset"
|
||||
}
|
||||
],
|
||||
"created": 1741340007,
|
||||
"id": "",
|
||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "3.1.2-dev0-native",
|
||||
"usage": {
|
||||
"completion_tokens": 10,
|
||||
"completion_tokens_details": null,
|
||||
"prompt_tokens": 6,
|
||||
"prompt_tokens_details": null,
|
||||
"total_tokens": 16
|
||||
}
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "length",
|
||||
"index": 1,
|
||||
"logprobs": null,
|
||||
"text": " for"
|
||||
}
|
||||
],
|
||||
"created": 1741340007,
|
||||
"id": "",
|
||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "3.1.2-dev0-native",
|
||||
"usage": {
|
||||
"completion_tokens": 10,
|
||||
"completion_tokens_details": null,
|
||||
"prompt_tokens": 5,
|
||||
"prompt_tokens_details": null,
|
||||
"total_tokens": 15
|
||||
}
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "length",
|
||||
"index": 2,
|
||||
"logprobs": null,
|
||||
"text": "The"
|
||||
}
|
||||
],
|
||||
"created": 1741340007,
|
||||
"id": "",
|
||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "3.1.2-dev0-native",
|
||||
"usage": {
|
||||
"completion_tokens": 10,
|
||||
"completion_tokens_details": null,
|
||||
"prompt_tokens": 8,
|
||||
"prompt_tokens_details": null,
|
||||
"total_tokens": 18
|
||||
}
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "length",
|
||||
"index": 3,
|
||||
"logprobs": null,
|
||||
"text": " \"\"\"\n"
|
||||
}
|
||||
],
|
||||
"created": 1741340007,
|
||||
"id": "",
|
||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "3.1.2-dev0-native",
|
||||
"usage": {
|
||||
"completion_tokens": 10,
|
||||
"completion_tokens_details": null,
|
||||
"prompt_tokens": 3,
|
||||
"prompt_tokens_details": null,
|
||||
"total_tokens": 13
|
||||
}
|
||||
}
|
||||
]
|
@ -1,7 +1,10 @@
|
||||
import pytest
|
||||
import requests
|
||||
import json
|
||||
from aiohttp import ClientSession
|
||||
from openai import OpenAI
|
||||
from huggingface_hub import InferenceClient
|
||||
from text_generation.types import Completion
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
@ -155,8 +158,8 @@ def test_flash_llama_completion_many_prompts(flash_llama_completion, response_sn
|
||||
async def test_flash_llama_completion_many_prompts_stream(
|
||||
flash_llama_completion, response_snapshot
|
||||
):
|
||||
client = InferenceClient(base_url=f"{flash_llama_completion.base_url}/v1")
|
||||
stream = client.completion(
|
||||
client = OpenAI(api_key="xx", base_url=f"{flash_llama_completion.base_url}/v1")
|
||||
stream = client.completions.create(
|
||||
model="tgi",
|
||||
prompt=[
|
||||
"What is Deep Learning?",
|
||||
@ -174,7 +177,6 @@ async def test_flash_llama_completion_many_prompts_stream(
|
||||
chunks = []
|
||||
for chunk in stream:
|
||||
chunks.append(chunk)
|
||||
assert "choices" in chunk
|
||||
index = chunk.choices[0].index
|
||||
assert 0 <= index <= 4
|
||||
strings[index] += chunk.choices[0].text
|
||||
|
Loading…
Reference in New Issue
Block a user