diff --git a/integration-tests/conftest.py b/integration-tests/conftest.py index be73c60a..f7852441 100644 --- a/integration-tests/conftest.py +++ b/integration-tests/conftest.py @@ -8,6 +8,7 @@ from huggingface_hub.inference._generated.types.chat_completion import ( from openai.types.chat.chat_completion_chunk import ( ChatCompletionChunk as OAIChatCompletionChunk, ) +from openai.types.completion import Completion as OAICompletion import requests @@ -132,6 +133,7 @@ class ResponseComparator(JSONSnapshotExtension): or isinstance(data, ChatCompletionComplete) or isinstance(data, Completion) or isinstance(data, OAIChatCompletionChunk) + or isinstance(data, OAICompletion) ): data = data.model_dump() elif isinstance(data, ChatCompletionStreamOutput) or isinstance( diff --git a/integration-tests/models/__snapshots__/test_completion_prompts/test_flash_llama_completion_many_prompts_stream.json b/integration-tests/models/__snapshots__/test_completion_prompts/test_flash_llama_completion_many_prompts_stream.json new file mode 100644 index 00000000..ad4ee6e0 --- /dev/null +++ b/integration-tests/models/__snapshots__/test_completion_prompts/test_flash_llama_completion_many_prompts_stream.json @@ -0,0 +1,666 @@ +[ + { + "choices": [ + { + "finish_reason": "", + "index": 0, + "logprobs": null, + "text": " A" + } + ], + "created": 1741340006, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "text_completion", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "finish_reason": "", + "index": 1, + "logprobs": null, + "text": " This" + } + ], + "created": 1741340006, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "text_completion", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "finish_reason": "", + "index": 2, + "logprobs": null, + "text": " Paris" + } + ], + "created": 1741340006, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "text_completion", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "finish_reason": "", + "index": 3, + "logprobs": null, + "text": "us" + } + ], + "created": 1741340006, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "text_completion", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "finish_reason": "", + "index": 0, + "logprobs": null, + "text": " Beginner" + } + ], + "created": 1741340006, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "text_completion", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "finish_reason": "", + "index": 1, + "logprobs": null, + "text": " is" + } + ], + "created": 1741340006, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "text_completion", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "finish_reason": "", + "index": 2, + "logprobs": null, + "text": "\n" + } + ], + "created": 1741340007, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "text_completion", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "finish_reason": "", + "index": 3, + "logprobs": null, + "text": "cul" + } + ], + "created": 1741340007, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "text_completion", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "finish_reason": "", + "index": 0, + "logprobs": null, + "text": "’s" + } + ], + "created": 1741340007, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "text_completion", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "finish_reason": "", + "index": 1, + "logprobs": null, + "text": " a" + } + ], + "created": 1741340007, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "text_completion", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "finish_reason": "", + "index": 2, + "logprobs": null, + "text": "What" + } + ], + "created": 1741340007, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "text_completion", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "finish_reason": "", + "index": 3, + "logprobs": null, + "text": "as" + } + ], + "created": 1741340007, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "text_completion", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "finish_reason": "", + "index": 0, + "logprobs": null, + "text": " Guide" + } + ], + "created": 1741340007, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "text_completion", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "finish_reason": "", + "index": 1, + "logprobs": null, + "text": " question" + } + ], + "created": 1741340007, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "text_completion", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "finish_reason": "", + "index": 2, + "logprobs": null, + "text": " is" + } + ], + "created": 1741340007, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "text_completion", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "finish_reason": "", + "index": 3, + "logprobs": null, + "text": "_minus" + } + ], + "created": 1741340007, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "text_completion", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "finish_reason": "", + "index": 0, + "logprobs": null, + "text": "\n" + } + ], + "created": 1741340007, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "text_completion", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "finish_reason": "", + "index": 1, + "logprobs": null, + "text": " that" + } + ], + "created": 1741340007, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "text_completion", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "finish_reason": "", + "index": 2, + "logprobs": null, + "text": " the" + } + ], + "created": 1741340007, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "text_completion", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "finish_reason": "", + "index": 3, + "logprobs": null, + "text": "cul" + } + ], + "created": 1741340007, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "text_completion", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "finish_reason": "", + "index": 0, + "logprobs": null, + "text": "Deep" + } + ], + "created": 1741340007, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "text_completion", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "finish_reason": "", + "index": 1, + "logprobs": null, + "text": " has" + } + ], + "created": 1741340007, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "text_completion", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "finish_reason": "", + "index": 2, + "logprobs": null, + "text": " capital" + } + ], + "created": 1741340007, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "text_completion", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "finish_reason": "", + "index": 3, + "logprobs": null, + "text": "as" + } + ], + "created": 1741340007, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "text_completion", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "finish_reason": "", + "index": 0, + "logprobs": null, + "text": " learning" + } + ], + "created": 1741340007, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "text_completion", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "finish_reason": "", + "index": 1, + "logprobs": null, + "text": " puzzled" + } + ], + "created": 1741340007, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "text_completion", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "finish_reason": "", + "index": 2, + "logprobs": null, + "text": " of" + } + ], + "created": 1741340007, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "text_completion", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "finish_reason": "", + "index": 3, + "logprobs": null, + "text": "(s" + } + ], + "created": 1741340007, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "text_completion", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "finish_reason": "", + "index": 0, + "logprobs": null, + "text": " is" + } + ], + "created": 1741340007, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "text_completion", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "finish_reason": "", + "index": 1, + "logprobs": null, + "text": " many" + } + ], + "created": 1741340007, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "text_completion", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "finish_reason": "", + "index": 2, + "logprobs": null, + "text": " France" + } + ], + "created": 1741340007, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "text_completion", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "finish_reason": "", + "index": 3, + "logprobs": null, + "text": "):\n" + } + ], + "created": 1741340007, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "text_completion", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "finish_reason": "", + "index": 0, + "logprobs": null, + "text": " a" + } + ], + "created": 1741340007, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "text_completion", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "finish_reason": "", + "index": 1, + "logprobs": null, + "text": " people" + } + ], + "created": 1741340007, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "text_completion", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "finish_reason": "", + "index": 2, + "logprobs": null, + "text": "?\n" + } + ], + "created": 1741340007, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "text_completion", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "finish_reason": "", + "index": 3, + "logprobs": null, + "text": " " + } + ], + "created": 1741340007, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "text_completion", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "finish_reason": "length", + "index": 0, + "logprobs": null, + "text": " subset" + } + ], + "created": 1741340007, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "text_completion", + "system_fingerprint": "3.1.2-dev0-native", + "usage": { + "completion_tokens": 10, + "completion_tokens_details": null, + "prompt_tokens": 6, + "prompt_tokens_details": null, + "total_tokens": 16 + } + }, + { + "choices": [ + { + "finish_reason": "length", + "index": 1, + "logprobs": null, + "text": " for" + } + ], + "created": 1741340007, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "text_completion", + "system_fingerprint": "3.1.2-dev0-native", + "usage": { + "completion_tokens": 10, + "completion_tokens_details": null, + "prompt_tokens": 5, + "prompt_tokens_details": null, + "total_tokens": 15 + } + }, + { + "choices": [ + { + "finish_reason": "length", + "index": 2, + "logprobs": null, + "text": "The" + } + ], + "created": 1741340007, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "text_completion", + "system_fingerprint": "3.1.2-dev0-native", + "usage": { + "completion_tokens": 10, + "completion_tokens_details": null, + "prompt_tokens": 8, + "prompt_tokens_details": null, + "total_tokens": 18 + } + }, + { + "choices": [ + { + "finish_reason": "length", + "index": 3, + "logprobs": null, + "text": " \"\"\"\n" + } + ], + "created": 1741340007, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "text_completion", + "system_fingerprint": "3.1.2-dev0-native", + "usage": { + "completion_tokens": 10, + "completion_tokens_details": null, + "prompt_tokens": 3, + "prompt_tokens_details": null, + "total_tokens": 13 + } + } +] diff --git a/integration-tests/models/test_completion_prompts.py b/integration-tests/models/test_completion_prompts.py index 90e26286..5b5dc350 100644 --- a/integration-tests/models/test_completion_prompts.py +++ b/integration-tests/models/test_completion_prompts.py @@ -1,7 +1,10 @@ import pytest import requests +import json +from aiohttp import ClientSession from openai import OpenAI from huggingface_hub import InferenceClient +from text_generation.types import Completion @pytest.fixture(scope="module") @@ -155,8 +158,8 @@ def test_flash_llama_completion_many_prompts(flash_llama_completion, response_sn async def test_flash_llama_completion_many_prompts_stream( flash_llama_completion, response_snapshot ): - client = InferenceClient(base_url=f"{flash_llama_completion.base_url}/v1") - stream = client.completion( + client = OpenAI(api_key="xx", base_url=f"{flash_llama_completion.base_url}/v1") + stream = client.completions.create( model="tgi", prompt=[ "What is Deep Learning?", @@ -174,7 +177,6 @@ async def test_flash_llama_completion_many_prompts_stream( chunks = [] for chunk in stream: chunks.append(chunk) - assert "choices" in chunk index = chunk.choices[0].index assert 0 <= index <= 4 strings[index] += chunk.choices[0].text