mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-19 22:02:06 +00:00
Pr 3003 ci branch (#3007)
* change ChatCompletionChunk to align with "OpenAI Chat Completions streaming API" Moving after tool_calls2 Signed-off-by: Wang, Yi A <yi.a.wang@intel.com> add in Buffering.. Signed-off-by: Wang, Yi A <yi.a.wang@intel.com> fix: handle usage outside of stream state and add tests Simplifying everything quite a bit. Remove the unused model_dump. Clippy. Clippy ? Ruff. Uppgrade the flake for latest transformers. Upgrade after rebase. Remove potential footgun. Fix completion test. * Clippy. * Tweak for multi prompt. * Ruff. * Update the snapshot a bit. --------- Co-authored-by: Nicolas Patry <patry.nicolas@protonmail.com>
This commit is contained in:
parent
124398fa57
commit
dc5f05f8e6
127
flake.lock
127
flake.lock
@ -2,10 +2,16 @@
|
|||||||
"nodes": {
|
"nodes": {
|
||||||
"cachix": {
|
"cachix": {
|
||||||
"inputs": {
|
"inputs": {
|
||||||
"devenv": ["crate2nix"],
|
"devenv": [
|
||||||
"flake-compat": ["crate2nix"],
|
"crate2nix"
|
||||||
|
],
|
||||||
|
"flake-compat": [
|
||||||
|
"crate2nix"
|
||||||
|
],
|
||||||
"nixpkgs": "nixpkgs",
|
"nixpkgs": "nixpkgs",
|
||||||
"pre-commit-hooks": ["crate2nix"]
|
"pre-commit-hooks": [
|
||||||
|
"crate2nix"
|
||||||
|
]
|
||||||
},
|
},
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1709700175,
|
"lastModified": 1709700175,
|
||||||
@ -24,10 +30,19 @@
|
|||||||
},
|
},
|
||||||
"cachix_2": {
|
"cachix_2": {
|
||||||
"inputs": {
|
"inputs": {
|
||||||
"devenv": ["crate2nix", "crate2nix_stable"],
|
"devenv": [
|
||||||
"flake-compat": ["crate2nix", "crate2nix_stable"],
|
"crate2nix",
|
||||||
|
"crate2nix_stable"
|
||||||
|
],
|
||||||
|
"flake-compat": [
|
||||||
|
"crate2nix",
|
||||||
|
"crate2nix_stable"
|
||||||
|
],
|
||||||
"nixpkgs": "nixpkgs_2",
|
"nixpkgs": "nixpkgs_2",
|
||||||
"pre-commit-hooks": ["crate2nix", "crate2nix_stable"]
|
"pre-commit-hooks": [
|
||||||
|
"crate2nix",
|
||||||
|
"crate2nix_stable"
|
||||||
|
]
|
||||||
},
|
},
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1716549461,
|
"lastModified": 1716549461,
|
||||||
@ -46,8 +61,16 @@
|
|||||||
},
|
},
|
||||||
"cachix_3": {
|
"cachix_3": {
|
||||||
"inputs": {
|
"inputs": {
|
||||||
"devenv": ["crate2nix", "crate2nix_stable", "crate2nix_stable"],
|
"devenv": [
|
||||||
"flake-compat": ["crate2nix", "crate2nix_stable", "crate2nix_stable"],
|
"crate2nix",
|
||||||
|
"crate2nix_stable",
|
||||||
|
"crate2nix_stable"
|
||||||
|
],
|
||||||
|
"flake-compat": [
|
||||||
|
"crate2nix",
|
||||||
|
"crate2nix_stable",
|
||||||
|
"crate2nix_stable"
|
||||||
|
],
|
||||||
"nixpkgs": "nixpkgs_3",
|
"nixpkgs": "nixpkgs_3",
|
||||||
"pre-commit-hooks": [
|
"pre-commit-hooks": [
|
||||||
"crate2nix",
|
"crate2nix",
|
||||||
@ -78,15 +101,18 @@
|
|||||||
"flake-compat": "flake-compat_3",
|
"flake-compat": "flake-compat_3",
|
||||||
"flake-parts": "flake-parts_3",
|
"flake-parts": "flake-parts_3",
|
||||||
"nix-test-runner": "nix-test-runner_3",
|
"nix-test-runner": "nix-test-runner_3",
|
||||||
"nixpkgs": ["tgi-nix", "nixpkgs"],
|
"nixpkgs": [
|
||||||
|
"tgi-nix",
|
||||||
|
"nixpkgs"
|
||||||
|
],
|
||||||
"pre-commit-hooks": "pre-commit-hooks_3"
|
"pre-commit-hooks": "pre-commit-hooks_3"
|
||||||
},
|
},
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1734429562,
|
"lastModified": 1739473963,
|
||||||
"narHash": "sha256-V2XNs3Ir8WXNHdocfzkR/fu0FzkZ9uTDJkVecxJrGmQ=",
|
"narHash": "sha256-ItAhpjNUzEWd/cgZVyW/jvoGbCec4TK29e1Mnmn1oJE=",
|
||||||
"owner": "nix-community",
|
"owner": "nix-community",
|
||||||
"repo": "crate2nix",
|
"repo": "crate2nix",
|
||||||
"rev": "8537c2d7cb623679aaeff62c4c4c43a91566ab09",
|
"rev": "be31feae9a82c225c0fd1bdf978565dc452a483a",
|
||||||
"type": "github"
|
"type": "github"
|
||||||
},
|
},
|
||||||
"original": {
|
"original": {
|
||||||
@ -193,7 +219,11 @@
|
|||||||
"devshell_2": {
|
"devshell_2": {
|
||||||
"inputs": {
|
"inputs": {
|
||||||
"flake-utils": "flake-utils_3",
|
"flake-utils": "flake-utils_3",
|
||||||
"nixpkgs": ["crate2nix", "crate2nix_stable", "nixpkgs"]
|
"nixpkgs": [
|
||||||
|
"crate2nix",
|
||||||
|
"crate2nix_stable",
|
||||||
|
"nixpkgs"
|
||||||
|
]
|
||||||
},
|
},
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1717408969,
|
"lastModified": 1717408969,
|
||||||
@ -212,7 +242,10 @@
|
|||||||
"devshell_3": {
|
"devshell_3": {
|
||||||
"inputs": {
|
"inputs": {
|
||||||
"flake-utils": "flake-utils_4",
|
"flake-utils": "flake-utils_4",
|
||||||
"nixpkgs": ["crate2nix", "nixpkgs"]
|
"nixpkgs": [
|
||||||
|
"crate2nix",
|
||||||
|
"nixpkgs"
|
||||||
|
]
|
||||||
},
|
},
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1711099426,
|
"lastModified": 1711099426,
|
||||||
@ -310,7 +343,11 @@
|
|||||||
},
|
},
|
||||||
"flake-parts_2": {
|
"flake-parts_2": {
|
||||||
"inputs": {
|
"inputs": {
|
||||||
"nixpkgs-lib": ["crate2nix", "crate2nix_stable", "nixpkgs"]
|
"nixpkgs-lib": [
|
||||||
|
"crate2nix",
|
||||||
|
"crate2nix_stable",
|
||||||
|
"nixpkgs"
|
||||||
|
]
|
||||||
},
|
},
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1719745305,
|
"lastModified": 1719745305,
|
||||||
@ -328,7 +365,10 @@
|
|||||||
},
|
},
|
||||||
"flake-parts_3": {
|
"flake-parts_3": {
|
||||||
"inputs": {
|
"inputs": {
|
||||||
"nixpkgs-lib": ["crate2nix", "nixpkgs"]
|
"nixpkgs-lib": [
|
||||||
|
"crate2nix",
|
||||||
|
"nixpkgs"
|
||||||
|
]
|
||||||
},
|
},
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1712014858,
|
"lastModified": 1712014858,
|
||||||
@ -519,7 +559,11 @@
|
|||||||
},
|
},
|
||||||
"gitignore_3": {
|
"gitignore_3": {
|
||||||
"inputs": {
|
"inputs": {
|
||||||
"nixpkgs": ["crate2nix", "pre-commit-hooks", "nixpkgs"]
|
"nixpkgs": [
|
||||||
|
"crate2nix",
|
||||||
|
"pre-commit-hooks",
|
||||||
|
"nixpkgs"
|
||||||
|
]
|
||||||
},
|
},
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1709087332,
|
"lastModified": 1709087332,
|
||||||
@ -726,10 +770,22 @@
|
|||||||
},
|
},
|
||||||
"pre-commit-hooks_2": {
|
"pre-commit-hooks_2": {
|
||||||
"inputs": {
|
"inputs": {
|
||||||
"flake-compat": ["crate2nix", "crate2nix_stable", "flake-compat"],
|
"flake-compat": [
|
||||||
|
"crate2nix",
|
||||||
|
"crate2nix_stable",
|
||||||
|
"flake-compat"
|
||||||
|
],
|
||||||
"gitignore": "gitignore_2",
|
"gitignore": "gitignore_2",
|
||||||
"nixpkgs": ["crate2nix", "crate2nix_stable", "nixpkgs"],
|
"nixpkgs": [
|
||||||
"nixpkgs-stable": ["crate2nix", "crate2nix_stable", "nixpkgs"]
|
"crate2nix",
|
||||||
|
"crate2nix_stable",
|
||||||
|
"nixpkgs"
|
||||||
|
],
|
||||||
|
"nixpkgs-stable": [
|
||||||
|
"crate2nix",
|
||||||
|
"crate2nix_stable",
|
||||||
|
"nixpkgs"
|
||||||
|
]
|
||||||
},
|
},
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1719259945,
|
"lastModified": 1719259945,
|
||||||
@ -747,11 +803,20 @@
|
|||||||
},
|
},
|
||||||
"pre-commit-hooks_3": {
|
"pre-commit-hooks_3": {
|
||||||
"inputs": {
|
"inputs": {
|
||||||
"flake-compat": ["crate2nix", "flake-compat"],
|
"flake-compat": [
|
||||||
|
"crate2nix",
|
||||||
|
"flake-compat"
|
||||||
|
],
|
||||||
"flake-utils": "flake-utils_5",
|
"flake-utils": "flake-utils_5",
|
||||||
"gitignore": "gitignore_3",
|
"gitignore": "gitignore_3",
|
||||||
"nixpkgs": ["crate2nix", "nixpkgs"],
|
"nixpkgs": [
|
||||||
"nixpkgs-stable": ["crate2nix", "nixpkgs"]
|
"crate2nix",
|
||||||
|
"nixpkgs"
|
||||||
|
],
|
||||||
|
"nixpkgs-stable": [
|
||||||
|
"crate2nix",
|
||||||
|
"nixpkgs"
|
||||||
|
]
|
||||||
},
|
},
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1712055707,
|
"lastModified": 1712055707,
|
||||||
@ -772,21 +837,27 @@
|
|||||||
"crate2nix": "crate2nix",
|
"crate2nix": "crate2nix",
|
||||||
"flake-utils": "flake-utils_6",
|
"flake-utils": "flake-utils_6",
|
||||||
"nix-filter": "nix-filter",
|
"nix-filter": "nix-filter",
|
||||||
"nixpkgs": ["tgi-nix", "nixpkgs"],
|
"nixpkgs": [
|
||||||
|
"tgi-nix",
|
||||||
|
"nixpkgs"
|
||||||
|
],
|
||||||
"rust-overlay": "rust-overlay",
|
"rust-overlay": "rust-overlay",
|
||||||
"tgi-nix": "tgi-nix"
|
"tgi-nix": "tgi-nix"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"rust-overlay": {
|
"rust-overlay": {
|
||||||
"inputs": {
|
"inputs": {
|
||||||
"nixpkgs": ["tgi-nix", "nixpkgs"]
|
"nixpkgs": [
|
||||||
|
"tgi-nix",
|
||||||
|
"nixpkgs"
|
||||||
|
]
|
||||||
},
|
},
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1738549608,
|
"lastModified": 1741141853,
|
||||||
"narHash": "sha256-GdyT9QEUSx5k/n8kILuNy83vxxdyUfJ8jL5mMpQZWfw=",
|
"narHash": "sha256-FauVtC+FbOgkKpGVuQTNxSqrvgbmVc7hFkjn/DacwMo=",
|
||||||
"owner": "oxalica",
|
"owner": "oxalica",
|
||||||
"repo": "rust-overlay",
|
"repo": "rust-overlay",
|
||||||
"rev": "35c6f8c4352f995ecd53896200769f80a3e8f22d",
|
"rev": "02edad1f19d6dec824e0812e4cdc0aa7930ff8ae",
|
||||||
"type": "github"
|
"type": "github"
|
||||||
},
|
},
|
||||||
"original": {
|
"original": {
|
||||||
|
@ -8,6 +8,7 @@ from huggingface_hub.inference._generated.types.chat_completion import (
|
|||||||
from openai.types.chat.chat_completion_chunk import (
|
from openai.types.chat.chat_completion_chunk import (
|
||||||
ChatCompletionChunk as OAIChatCompletionChunk,
|
ChatCompletionChunk as OAIChatCompletionChunk,
|
||||||
)
|
)
|
||||||
|
from openai.types.completion import Completion as OAICompletion
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
|
|
||||||
@ -39,7 +40,6 @@ from typing import Dict, List, Optional
|
|||||||
from aiohttp import ClientConnectorError, ClientOSError, ServerDisconnectedError
|
from aiohttp import ClientConnectorError, ClientOSError, ServerDisconnectedError
|
||||||
from docker.errors import NotFound
|
from docker.errors import NotFound
|
||||||
from syrupy.extensions.json import JSONSnapshotExtension
|
from syrupy.extensions.json import JSONSnapshotExtension
|
||||||
|
|
||||||
from text_generation import AsyncClient
|
from text_generation import AsyncClient
|
||||||
from text_generation.types import (
|
from text_generation.types import (
|
||||||
BestOfSequence,
|
BestOfSequence,
|
||||||
@ -133,6 +133,7 @@ class ResponseComparator(JSONSnapshotExtension):
|
|||||||
or isinstance(data, ChatCompletionComplete)
|
or isinstance(data, ChatCompletionComplete)
|
||||||
or isinstance(data, Completion)
|
or isinstance(data, Completion)
|
||||||
or isinstance(data, OAIChatCompletionChunk)
|
or isinstance(data, OAIChatCompletionChunk)
|
||||||
|
or isinstance(data, OAICompletion)
|
||||||
):
|
):
|
||||||
data = data.model_dump()
|
data = data.model_dump()
|
||||||
elif isinstance(data, ChatCompletionStreamOutput) or isinstance(
|
elif isinstance(data, ChatCompletionStreamOutput) or isinstance(
|
||||||
|
@ -0,0 +1,62 @@
|
|||||||
|
[
|
||||||
|
{
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": "OK",
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": null
|
||||||
|
},
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1741265520,
|
||||||
|
"id": "",
|
||||||
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": "!",
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": null
|
||||||
|
},
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1741265520,
|
||||||
|
"id": "",
|
||||||
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": "",
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": null
|
||||||
|
},
|
||||||
|
"finish_reason": "stop",
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1741265520,
|
||||||
|
"id": "",
|
||||||
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
]
|
@ -0,0 +1,75 @@
|
|||||||
|
[
|
||||||
|
{
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": "OK",
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": null
|
||||||
|
},
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1741266005,
|
||||||
|
"id": "",
|
||||||
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": "!",
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": null
|
||||||
|
},
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1741266005,
|
||||||
|
"id": "",
|
||||||
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": "",
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": null
|
||||||
|
},
|
||||||
|
"finish_reason": "stop",
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1741266005,
|
||||||
|
"id": "",
|
||||||
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"choices": [],
|
||||||
|
"created": 1741266005,
|
||||||
|
"id": "",
|
||||||
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": {
|
||||||
|
"completion_tokens": 3,
|
||||||
|
"prompt_tokens": 39,
|
||||||
|
"total_tokens": 42
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
@ -0,0 +1,71 @@
|
|||||||
|
[
|
||||||
|
{
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": "OK",
|
||||||
|
"function_call": null,
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": null
|
||||||
|
},
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1741265134,
|
||||||
|
"id": "",
|
||||||
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": "!",
|
||||||
|
"function_call": null,
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": null
|
||||||
|
},
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1741265134,
|
||||||
|
"id": "",
|
||||||
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": "",
|
||||||
|
"function_call": null,
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": null
|
||||||
|
},
|
||||||
|
"finish_reason": "stop",
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1741265134,
|
||||||
|
"id": "",
|
||||||
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
]
|
@ -0,0 +1,87 @@
|
|||||||
|
[
|
||||||
|
{
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": "OK",
|
||||||
|
"function_call": null,
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": null
|
||||||
|
},
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1741265133,
|
||||||
|
"id": "",
|
||||||
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": "!",
|
||||||
|
"function_call": null,
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": null
|
||||||
|
},
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1741265133,
|
||||||
|
"id": "",
|
||||||
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": "",
|
||||||
|
"function_call": null,
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": null
|
||||||
|
},
|
||||||
|
"finish_reason": "stop",
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1741265133,
|
||||||
|
"id": "",
|
||||||
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"choices": [],
|
||||||
|
"created": 1741265133,
|
||||||
|
"id": "",
|
||||||
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": {
|
||||||
|
"completion_tokens": 3,
|
||||||
|
"completion_tokens_details": null,
|
||||||
|
"prompt_tokens": 39,
|
||||||
|
"prompt_tokens_details": null,
|
||||||
|
"total_tokens": 42
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
@ -1,17 +1,17 @@
|
|||||||
{
|
{
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
|
||||||
"finish_reason": "length",
|
|
||||||
"index": 0,
|
|
||||||
"logprobs": null,
|
|
||||||
"text": " A Beginner’s Guide\nDeep learning is a subset"
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"finish_reason": "length",
|
"finish_reason": "length",
|
||||||
"index": 1,
|
"index": 1,
|
||||||
"logprobs": null,
|
"logprobs": null,
|
||||||
"text": " This is a question that has puzzled many people for"
|
"text": " This is a question that has puzzled many people for"
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"finish_reason": "length",
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": " A Beginner’s Guide\nDeep learning is a subset"
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"finish_reason": "length",
|
"finish_reason": "length",
|
||||||
"index": 3,
|
"index": 3,
|
||||||
@ -25,11 +25,11 @@
|
|||||||
"text": " Paris\nWhat is the capital of France?\nThe"
|
"text": " Paris\nWhat is the capital of France?\nThe"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1725877154,
|
"created": 1741264813,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "text_completion",
|
"object": "text_completion",
|
||||||
"system_fingerprint": "2.2.1-dev0-native",
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
"usage": {
|
"usage": {
|
||||||
"completion_tokens": 40,
|
"completion_tokens": 40,
|
||||||
"prompt_tokens": 22,
|
"prompt_tokens": 22,
|
||||||
|
@ -8,11 +8,12 @@
|
|||||||
"text": " A"
|
"text": " A"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1725883643,
|
"created": 1741340006,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "text_completion",
|
"object": "text_completion",
|
||||||
"system_fingerprint": "2.2.1-dev0-native"
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"choices": [
|
"choices": [
|
||||||
@ -23,11 +24,12 @@
|
|||||||
"text": " This"
|
"text": " This"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1725883643,
|
"created": 1741340006,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "text_completion",
|
"object": "text_completion",
|
||||||
"system_fingerprint": "2.2.1-dev0-native"
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"choices": [
|
"choices": [
|
||||||
@ -38,11 +40,12 @@
|
|||||||
"text": " Paris"
|
"text": " Paris"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1725883643,
|
"created": 1741340006,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "text_completion",
|
"object": "text_completion",
|
||||||
"system_fingerprint": "2.2.1-dev0-native"
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"choices": [
|
"choices": [
|
||||||
@ -53,11 +56,12 @@
|
|||||||
"text": "us"
|
"text": "us"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1725883643,
|
"created": 1741340006,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "text_completion",
|
"object": "text_completion",
|
||||||
"system_fingerprint": "2.2.1-dev0-native"
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"choices": [
|
"choices": [
|
||||||
@ -68,11 +72,12 @@
|
|||||||
"text": " Beginner"
|
"text": " Beginner"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1725883643,
|
"created": 1741340006,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "text_completion",
|
"object": "text_completion",
|
||||||
"system_fingerprint": "2.2.1-dev0-native"
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"choices": [
|
"choices": [
|
||||||
@ -83,11 +88,12 @@
|
|||||||
"text": " is"
|
"text": " is"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1725883643,
|
"created": 1741340006,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "text_completion",
|
"object": "text_completion",
|
||||||
"system_fingerprint": "2.2.1-dev0-native"
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"choices": [
|
"choices": [
|
||||||
@ -98,11 +104,12 @@
|
|||||||
"text": "\n"
|
"text": "\n"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1725883643,
|
"created": 1741340007,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "text_completion",
|
"object": "text_completion",
|
||||||
"system_fingerprint": "2.2.1-dev0-native"
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"choices": [
|
"choices": [
|
||||||
@ -113,11 +120,12 @@
|
|||||||
"text": "cul"
|
"text": "cul"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1725883643,
|
"created": 1741340007,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "text_completion",
|
"object": "text_completion",
|
||||||
"system_fingerprint": "2.2.1-dev0-native"
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"choices": [
|
"choices": [
|
||||||
@ -128,11 +136,12 @@
|
|||||||
"text": "’s"
|
"text": "’s"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1725883643,
|
"created": 1741340007,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "text_completion",
|
"object": "text_completion",
|
||||||
"system_fingerprint": "2.2.1-dev0-native"
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"choices": [
|
"choices": [
|
||||||
@ -143,11 +152,12 @@
|
|||||||
"text": " a"
|
"text": " a"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1725883643,
|
"created": 1741340007,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "text_completion",
|
"object": "text_completion",
|
||||||
"system_fingerprint": "2.2.1-dev0-native"
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"choices": [
|
"choices": [
|
||||||
@ -158,11 +168,12 @@
|
|||||||
"text": "What"
|
"text": "What"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1725883643,
|
"created": 1741340007,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "text_completion",
|
"object": "text_completion",
|
||||||
"system_fingerprint": "2.2.1-dev0-native"
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"choices": [
|
"choices": [
|
||||||
@ -173,11 +184,12 @@
|
|||||||
"text": "as"
|
"text": "as"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1725883643,
|
"created": 1741340007,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "text_completion",
|
"object": "text_completion",
|
||||||
"system_fingerprint": "2.2.1-dev0-native"
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"choices": [
|
"choices": [
|
||||||
@ -188,11 +200,12 @@
|
|||||||
"text": " Guide"
|
"text": " Guide"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1725883643,
|
"created": 1741340007,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "text_completion",
|
"object": "text_completion",
|
||||||
"system_fingerprint": "2.2.1-dev0-native"
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"choices": [
|
"choices": [
|
||||||
@ -203,11 +216,12 @@
|
|||||||
"text": " question"
|
"text": " question"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1725883643,
|
"created": 1741340007,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "text_completion",
|
"object": "text_completion",
|
||||||
"system_fingerprint": "2.2.1-dev0-native"
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"choices": [
|
"choices": [
|
||||||
@ -218,11 +232,12 @@
|
|||||||
"text": " is"
|
"text": " is"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1725883643,
|
"created": 1741340007,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "text_completion",
|
"object": "text_completion",
|
||||||
"system_fingerprint": "2.2.1-dev0-native"
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"choices": [
|
"choices": [
|
||||||
@ -233,11 +248,12 @@
|
|||||||
"text": "_minus"
|
"text": "_minus"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1725883643,
|
"created": 1741340007,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "text_completion",
|
"object": "text_completion",
|
||||||
"system_fingerprint": "2.2.1-dev0-native"
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"choices": [
|
"choices": [
|
||||||
@ -248,11 +264,12 @@
|
|||||||
"text": "\n"
|
"text": "\n"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1725883643,
|
"created": 1741340007,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "text_completion",
|
"object": "text_completion",
|
||||||
"system_fingerprint": "2.2.1-dev0-native"
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"choices": [
|
"choices": [
|
||||||
@ -263,11 +280,12 @@
|
|||||||
"text": " that"
|
"text": " that"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1725883643,
|
"created": 1741340007,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "text_completion",
|
"object": "text_completion",
|
||||||
"system_fingerprint": "2.2.1-dev0-native"
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"choices": [
|
"choices": [
|
||||||
@ -278,11 +296,12 @@
|
|||||||
"text": " the"
|
"text": " the"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1725883643,
|
"created": 1741340007,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "text_completion",
|
"object": "text_completion",
|
||||||
"system_fingerprint": "2.2.1-dev0-native"
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"choices": [
|
"choices": [
|
||||||
@ -293,11 +312,12 @@
|
|||||||
"text": "cul"
|
"text": "cul"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1725883643,
|
"created": 1741340007,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "text_completion",
|
"object": "text_completion",
|
||||||
"system_fingerprint": "2.2.1-dev0-native"
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"choices": [
|
"choices": [
|
||||||
@ -308,11 +328,12 @@
|
|||||||
"text": "Deep"
|
"text": "Deep"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1725883643,
|
"created": 1741340007,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "text_completion",
|
"object": "text_completion",
|
||||||
"system_fingerprint": "2.2.1-dev0-native"
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"choices": [
|
"choices": [
|
||||||
@ -323,11 +344,12 @@
|
|||||||
"text": " has"
|
"text": " has"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1725883643,
|
"created": 1741340007,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "text_completion",
|
"object": "text_completion",
|
||||||
"system_fingerprint": "2.2.1-dev0-native"
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"choices": [
|
"choices": [
|
||||||
@ -338,11 +360,12 @@
|
|||||||
"text": " capital"
|
"text": " capital"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1725883643,
|
"created": 1741340007,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "text_completion",
|
"object": "text_completion",
|
||||||
"system_fingerprint": "2.2.1-dev0-native"
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"choices": [
|
"choices": [
|
||||||
@ -353,11 +376,12 @@
|
|||||||
"text": "as"
|
"text": "as"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1725883643,
|
"created": 1741340007,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "text_completion",
|
"object": "text_completion",
|
||||||
"system_fingerprint": "2.2.1-dev0-native"
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"choices": [
|
"choices": [
|
||||||
@ -368,11 +392,12 @@
|
|||||||
"text": " learning"
|
"text": " learning"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1725883643,
|
"created": 1741340007,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "text_completion",
|
"object": "text_completion",
|
||||||
"system_fingerprint": "2.2.1-dev0-native"
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"choices": [
|
"choices": [
|
||||||
@ -383,11 +408,12 @@
|
|||||||
"text": " puzzled"
|
"text": " puzzled"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1725883643,
|
"created": 1741340007,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "text_completion",
|
"object": "text_completion",
|
||||||
"system_fingerprint": "2.2.1-dev0-native"
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"choices": [
|
"choices": [
|
||||||
@ -398,11 +424,12 @@
|
|||||||
"text": " of"
|
"text": " of"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1725883643,
|
"created": 1741340007,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "text_completion",
|
"object": "text_completion",
|
||||||
"system_fingerprint": "2.2.1-dev0-native"
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"choices": [
|
"choices": [
|
||||||
@ -413,11 +440,12 @@
|
|||||||
"text": "(s"
|
"text": "(s"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1725883643,
|
"created": 1741340007,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "text_completion",
|
"object": "text_completion",
|
||||||
"system_fingerprint": "2.2.1-dev0-native"
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"choices": [
|
"choices": [
|
||||||
@ -428,11 +456,12 @@
|
|||||||
"text": " is"
|
"text": " is"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1725883643,
|
"created": 1741340007,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "text_completion",
|
"object": "text_completion",
|
||||||
"system_fingerprint": "2.2.1-dev0-native"
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"choices": [
|
"choices": [
|
||||||
@ -443,11 +472,12 @@
|
|||||||
"text": " many"
|
"text": " many"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1725883643,
|
"created": 1741340007,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "text_completion",
|
"object": "text_completion",
|
||||||
"system_fingerprint": "2.2.1-dev0-native"
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"choices": [
|
"choices": [
|
||||||
@ -458,11 +488,12 @@
|
|||||||
"text": " France"
|
"text": " France"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1725883643,
|
"created": 1741340007,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "text_completion",
|
"object": "text_completion",
|
||||||
"system_fingerprint": "2.2.1-dev0-native"
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"choices": [
|
"choices": [
|
||||||
@ -473,11 +504,12 @@
|
|||||||
"text": "):\n"
|
"text": "):\n"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1725883643,
|
"created": 1741340007,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "text_completion",
|
"object": "text_completion",
|
||||||
"system_fingerprint": "2.2.1-dev0-native"
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"choices": [
|
"choices": [
|
||||||
@ -488,11 +520,12 @@
|
|||||||
"text": " a"
|
"text": " a"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1725883643,
|
"created": 1741340007,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "text_completion",
|
"object": "text_completion",
|
||||||
"system_fingerprint": "2.2.1-dev0-native"
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"choices": [
|
"choices": [
|
||||||
@ -503,11 +536,12 @@
|
|||||||
"text": " people"
|
"text": " people"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1725883643,
|
"created": 1741340007,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "text_completion",
|
"object": "text_completion",
|
||||||
"system_fingerprint": "2.2.1-dev0-native"
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"choices": [
|
"choices": [
|
||||||
@ -518,11 +552,12 @@
|
|||||||
"text": "?\n"
|
"text": "?\n"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1725883643,
|
"created": 1741340007,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "text_completion",
|
"object": "text_completion",
|
||||||
"system_fingerprint": "2.2.1-dev0-native"
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"choices": [
|
"choices": [
|
||||||
@ -533,11 +568,12 @@
|
|||||||
"text": " "
|
"text": " "
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1725883643,
|
"created": 1741340007,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "text_completion",
|
"object": "text_completion",
|
||||||
"system_fingerprint": "2.2.1-dev0-native"
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"choices": [
|
"choices": [
|
||||||
@ -548,11 +584,18 @@
|
|||||||
"text": " subset"
|
"text": " subset"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1725883643,
|
"created": 1741340007,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "text_completion",
|
"object": "text_completion",
|
||||||
"system_fingerprint": "2.2.1-dev0-native"
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": {
|
||||||
|
"completion_tokens": 10,
|
||||||
|
"completion_tokens_details": null,
|
||||||
|
"prompt_tokens": 6,
|
||||||
|
"prompt_tokens_details": null,
|
||||||
|
"total_tokens": 16
|
||||||
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"choices": [
|
"choices": [
|
||||||
@ -563,11 +606,18 @@
|
|||||||
"text": " for"
|
"text": " for"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1725883643,
|
"created": 1741340007,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "text_completion",
|
"object": "text_completion",
|
||||||
"system_fingerprint": "2.2.1-dev0-native"
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": {
|
||||||
|
"completion_tokens": 10,
|
||||||
|
"completion_tokens_details": null,
|
||||||
|
"prompt_tokens": 5,
|
||||||
|
"prompt_tokens_details": null,
|
||||||
|
"total_tokens": 15
|
||||||
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"choices": [
|
"choices": [
|
||||||
@ -578,11 +628,18 @@
|
|||||||
"text": "The"
|
"text": "The"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1725883643,
|
"created": 1741340007,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "text_completion",
|
"object": "text_completion",
|
||||||
"system_fingerprint": "2.2.1-dev0-native"
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": {
|
||||||
|
"completion_tokens": 10,
|
||||||
|
"completion_tokens_details": null,
|
||||||
|
"prompt_tokens": 8,
|
||||||
|
"prompt_tokens_details": null,
|
||||||
|
"total_tokens": 18
|
||||||
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"choices": [
|
"choices": [
|
||||||
@ -593,10 +650,17 @@
|
|||||||
"text": " \"\"\"\n"
|
"text": " \"\"\"\n"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1725883643,
|
"created": 1741340007,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "text_completion",
|
"object": "text_completion",
|
||||||
"system_fingerprint": "2.2.1-dev0-native"
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": {
|
||||||
|
"completion_tokens": 10,
|
||||||
|
"completion_tokens_details": null,
|
||||||
|
"prompt_tokens": 3,
|
||||||
|
"prompt_tokens_details": null,
|
||||||
|
"total_tokens": 13
|
||||||
|
}
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
@ -7,11 +7,11 @@
|
|||||||
"text": " A Beginner’s Guide\nDeep learning is a subset"
|
"text": " A Beginner’s Guide\nDeep learning is a subset"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1725876621,
|
"created": 1741264812,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "text_completion",
|
"object": "text_completion",
|
||||||
"system_fingerprint": "2.2.1-dev0-native",
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
"usage": {
|
"usage": {
|
||||||
"completion_tokens": 10,
|
"completion_tokens": 10,
|
||||||
"prompt_tokens": 6,
|
"prompt_tokens": 6,
|
||||||
|
@ -12,7 +12,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741338471,
|
"created": 1741373593,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -32,7 +32,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741338471,
|
"created": 1741373593,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -52,7 +52,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741338471,
|
"created": 1741373593,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -72,7 +72,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741338471,
|
"created": 1741373594,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -92,7 +92,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741338472,
|
"created": 1741373594,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -112,7 +112,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741338472,
|
"created": 1741373594,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -132,7 +132,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741338472,
|
"created": 1741373594,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -152,7 +152,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741338472,
|
"created": 1741373594,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -172,7 +172,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741338472,
|
"created": 1741373594,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -192,7 +192,16 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741338472,
|
"created": 1741373594,
|
||||||
|
"id": "",
|
||||||
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"choices": [],
|
||||||
|
"created": 1741373594,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
|
15
integration-tests/models/test_chat_stream_options.py
Normal file
15
integration-tests/models/test_chat_stream_options.py
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="module")
|
||||||
|
def chat_handle(launcher):
|
||||||
|
with launcher(
|
||||||
|
"meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||||
|
) as handle:
|
||||||
|
yield handle
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="module")
|
||||||
|
async def chat_client(chat_handle):
|
||||||
|
await chat_handle.health(300)
|
||||||
|
return chat_handle.client
|
@ -1,11 +1,8 @@
|
|||||||
import pytest
|
import pytest
|
||||||
import requests
|
import requests
|
||||||
import json
|
from openai import OpenAI
|
||||||
from aiohttp import ClientSession
|
|
||||||
from huggingface_hub import InferenceClient
|
from huggingface_hub import InferenceClient
|
||||||
|
|
||||||
from text_generation.types import Completion
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="module")
|
@pytest.fixture(scope="module")
|
||||||
def flash_llama_completion_handle(launcher):
|
def flash_llama_completion_handle(launcher):
|
||||||
@ -73,7 +70,6 @@ async def test_flash_llama_completion_stream_usage(
|
|||||||
for chunk in stream:
|
for chunk in stream:
|
||||||
# remove "data:"
|
# remove "data:"
|
||||||
chunks.append(chunk)
|
chunks.append(chunk)
|
||||||
print(f"Chunk {chunk}")
|
|
||||||
if len(chunk.choices) == 1:
|
if len(chunk.choices) == 1:
|
||||||
index = chunk.choices[0].index
|
index = chunk.choices[0].index
|
||||||
assert index == 0
|
assert index == 0
|
||||||
@ -158,47 +154,29 @@ def test_flash_llama_completion_many_prompts(flash_llama_completion, response_sn
|
|||||||
async def test_flash_llama_completion_many_prompts_stream(
|
async def test_flash_llama_completion_many_prompts_stream(
|
||||||
flash_llama_completion, response_snapshot
|
flash_llama_completion, response_snapshot
|
||||||
):
|
):
|
||||||
request = {
|
client = OpenAI(api_key="xx", base_url=f"{flash_llama_completion.base_url}/v1")
|
||||||
"model": "tgi",
|
stream = client.completions.create(
|
||||||
"prompt": [
|
model="tgi",
|
||||||
|
prompt=[
|
||||||
"What is Deep Learning?",
|
"What is Deep Learning?",
|
||||||
"Is water wet?",
|
"Is water wet?",
|
||||||
"What is the capital of France?",
|
"What is the capital of France?",
|
||||||
"def mai",
|
"def mai",
|
||||||
],
|
],
|
||||||
"max_tokens": 10,
|
max_tokens=10,
|
||||||
"seed": 0,
|
seed=0,
|
||||||
"temperature": 0.0,
|
temperature=0.0,
|
||||||
"stream": True,
|
stream=True,
|
||||||
}
|
)
|
||||||
|
|
||||||
url = f"{flash_llama_completion.base_url}/v1/completions"
|
|
||||||
|
|
||||||
chunks = []
|
|
||||||
strings = [""] * 4
|
strings = [""] * 4
|
||||||
async with ClientSession(headers=flash_llama_completion.headers) as session:
|
chunks = []
|
||||||
async with session.post(url, json=request) as response:
|
for chunk in stream:
|
||||||
# iterate over the stream
|
chunks.append(chunk)
|
||||||
async for chunk in response.content.iter_any():
|
index = chunk.choices[0].index
|
||||||
# remove "data:"
|
assert 0 <= index <= 4
|
||||||
chunk = chunk.decode().split("\n\n")
|
strings[index] += chunk.choices[0].text
|
||||||
# remove "data:" if present
|
|
||||||
chunk = [c.replace("data:", "") for c in chunk]
|
|
||||||
# remove empty strings
|
|
||||||
chunk = [c for c in chunk if c]
|
|
||||||
# remove completion marking chunk
|
|
||||||
chunk = [c for c in chunk if c != " [DONE]"]
|
|
||||||
# parse json
|
|
||||||
chunk = [json.loads(c) for c in chunk]
|
|
||||||
|
|
||||||
for c in chunk:
|
|
||||||
chunks.append(Completion(**c))
|
|
||||||
assert "choices" in c
|
|
||||||
index = c["choices"][0]["index"]
|
|
||||||
assert 0 <= index <= 4
|
|
||||||
strings[index] += c["choices"][0]["text"]
|
|
||||||
|
|
||||||
assert response.status == 200
|
|
||||||
assert list(strings) == [
|
assert list(strings) == [
|
||||||
" A Beginner’s Guide\nDeep learning is a subset",
|
" A Beginner’s Guide\nDeep learning is a subset",
|
||||||
" This is a question that has puzzled many people for",
|
" This is a question that has puzzled many people for",
|
||||||
@ -206,3 +184,92 @@ async def test_flash_llama_completion_many_prompts_stream(
|
|||||||
'usculas_minusculas(s):\n """\n',
|
'usculas_minusculas(s):\n """\n',
|
||||||
]
|
]
|
||||||
assert chunks == response_snapshot
|
assert chunks == response_snapshot
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
|
async def test_chat_openai_usage(flash_llama_completion, response_snapshot):
|
||||||
|
client = OpenAI(api_key="xx", base_url=f"{flash_llama_completion.base_url}/v1")
|
||||||
|
|
||||||
|
stream = client.chat.completions.create(
|
||||||
|
model="tgi",
|
||||||
|
messages=[{"role": "user", "content": "Say 'OK!'"}],
|
||||||
|
stream=True,
|
||||||
|
max_tokens=10,
|
||||||
|
seed=42,
|
||||||
|
stream_options={"include_usage": True},
|
||||||
|
)
|
||||||
|
|
||||||
|
chunks = []
|
||||||
|
for chunk in stream:
|
||||||
|
chunks.append(chunk)
|
||||||
|
for chunk in chunks[:-1]:
|
||||||
|
assert chunk.usage is None
|
||||||
|
for chunk in chunks[-1:]:
|
||||||
|
assert chunk.usage is not None
|
||||||
|
|
||||||
|
assert chunks == response_snapshot
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
|
async def test_chat_openai_nousage(flash_llama_completion, response_snapshot):
|
||||||
|
client = OpenAI(api_key="xx", base_url=f"{flash_llama_completion.base_url}/v1")
|
||||||
|
|
||||||
|
stream = client.chat.completions.create(
|
||||||
|
model="tgi",
|
||||||
|
messages=[{"role": "user", "content": "Say 'OK!'"}],
|
||||||
|
stream=True,
|
||||||
|
max_tokens=10,
|
||||||
|
seed=42,
|
||||||
|
stream_options={"include_usage": False},
|
||||||
|
)
|
||||||
|
|
||||||
|
chunks = []
|
||||||
|
for chunk in stream:
|
||||||
|
assert chunk.usage is None
|
||||||
|
chunks.append(chunk)
|
||||||
|
|
||||||
|
assert chunks == response_snapshot
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
|
async def test_chat_hfhub_usage(flash_llama_completion, response_snapshot):
|
||||||
|
client = InferenceClient(base_url=f"{flash_llama_completion.base_url}/v1")
|
||||||
|
stream = client.chat_completion(
|
||||||
|
model="tgi",
|
||||||
|
messages=[{"role": "user", "content": "Say 'OK!'"}],
|
||||||
|
stream=True,
|
||||||
|
max_tokens=10,
|
||||||
|
seed=42,
|
||||||
|
stream_options={"include_usage": True},
|
||||||
|
)
|
||||||
|
|
||||||
|
chunks = []
|
||||||
|
for chunk in stream:
|
||||||
|
chunks.append(chunk)
|
||||||
|
|
||||||
|
for chunk in chunks[:-1]:
|
||||||
|
assert chunk.usage is None
|
||||||
|
for chunk in chunks[-1:]:
|
||||||
|
assert chunk.usage is not None
|
||||||
|
|
||||||
|
assert chunks == response_snapshot
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
|
async def test_chat_hfhub_nousage(flash_llama_completion, response_snapshot):
|
||||||
|
client = InferenceClient(base_url=f"{flash_llama_completion.base_url}/v1")
|
||||||
|
stream = client.chat_completion(
|
||||||
|
model="tgi",
|
||||||
|
messages=[{"role": "user", "content": "Say 'OK!'"}],
|
||||||
|
stream=True,
|
||||||
|
max_tokens=10,
|
||||||
|
seed=42,
|
||||||
|
stream_options={"include_usage": False},
|
||||||
|
)
|
||||||
|
|
||||||
|
chunks = []
|
||||||
|
for chunk in stream:
|
||||||
|
assert chunk.usage is None
|
||||||
|
chunks.append(chunk)
|
||||||
|
|
||||||
|
assert chunks == response_snapshot
|
||||||
|
@ -764,7 +764,6 @@ impl ChatCompletionChunk {
|
|||||||
created: u64,
|
created: u64,
|
||||||
logprobs: Option<ChatCompletionLogprobs>,
|
logprobs: Option<ChatCompletionLogprobs>,
|
||||||
finish_reason: Option<String>,
|
finish_reason: Option<String>,
|
||||||
usage: Option<Usage>,
|
|
||||||
) -> Self {
|
) -> Self {
|
||||||
let delta = match (delta, tool_calls) {
|
let delta = match (delta, tool_calls) {
|
||||||
(Some(delta), _) => ChatCompletionDelta::Chat(TextMessage {
|
(Some(delta), _) => ChatCompletionDelta::Chat(TextMessage {
|
||||||
@ -801,7 +800,7 @@ impl ChatCompletionChunk {
|
|||||||
logprobs,
|
logprobs,
|
||||||
finish_reason,
|
finish_reason,
|
||||||
}],
|
}],
|
||||||
usage,
|
usage: None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1124,7 +1124,6 @@ enum StreamState {
|
|||||||
fn create_event_from_stream_token(
|
fn create_event_from_stream_token(
|
||||||
stream_token: &StreamResponse,
|
stream_token: &StreamResponse,
|
||||||
logprobs: bool,
|
logprobs: bool,
|
||||||
stream_options: Option<StreamOptions>,
|
|
||||||
inner_using_tools: bool,
|
inner_using_tools: bool,
|
||||||
system_fingerprint: String,
|
system_fingerprint: String,
|
||||||
model_id: String,
|
model_id: String,
|
||||||
@ -1151,30 +1150,10 @@ fn create_event_from_stream_token(
|
|||||||
|
|
||||||
(content, None)
|
(content, None)
|
||||||
};
|
};
|
||||||
|
let finish_reason = stream_token
|
||||||
let (usage, finish_reason) = match &stream_token.details {
|
.details
|
||||||
Some(details) => {
|
.as_ref()
|
||||||
let usage = if stream_options
|
.map(|details| details.finish_reason.format(true));
|
||||||
.as_ref()
|
|
||||||
.map(|s| s.include_usage)
|
|
||||||
.unwrap_or(false)
|
|
||||||
{
|
|
||||||
let completion_tokens = details.generated_tokens;
|
|
||||||
let prompt_tokens = details.input_length;
|
|
||||||
let total_tokens = prompt_tokens + completion_tokens;
|
|
||||||
Some(Usage {
|
|
||||||
completion_tokens,
|
|
||||||
prompt_tokens,
|
|
||||||
total_tokens,
|
|
||||||
})
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
};
|
|
||||||
(usage, Some(details.finish_reason.format(true)))
|
|
||||||
}
|
|
||||||
None => (None, None),
|
|
||||||
};
|
|
||||||
|
|
||||||
let chat_complete = CompletionType::ChatCompletionChunk(ChatCompletionChunk::new(
|
let chat_complete = CompletionType::ChatCompletionChunk(ChatCompletionChunk::new(
|
||||||
model_id.clone(),
|
model_id.clone(),
|
||||||
system_fingerprint.clone(),
|
system_fingerprint.clone(),
|
||||||
@ -1183,7 +1162,6 @@ fn create_event_from_stream_token(
|
|||||||
current_time,
|
current_time,
|
||||||
logprobs,
|
logprobs,
|
||||||
finish_reason,
|
finish_reason,
|
||||||
usage,
|
|
||||||
));
|
));
|
||||||
|
|
||||||
event.json_data(chat_complete).unwrap_or_else(|e| {
|
event.json_data(chat_complete).unwrap_or_else(|e| {
|
||||||
@ -1287,6 +1265,17 @@ pub(crate) async fn chat_completions(
|
|||||||
match result{
|
match result{
|
||||||
Ok(stream_token) => {
|
Ok(stream_token) => {
|
||||||
let token_text = &stream_token.token.text.clone();
|
let token_text = &stream_token.token.text.clone();
|
||||||
|
let usage = stream_token.details.as_ref().map(|details| {
|
||||||
|
let completion_tokens = details.generated_tokens;
|
||||||
|
let prompt_tokens = details.input_length;
|
||||||
|
let total_tokens = prompt_tokens + completion_tokens;
|
||||||
|
|
||||||
|
Usage {
|
||||||
|
completion_tokens,
|
||||||
|
prompt_tokens,
|
||||||
|
total_tokens,
|
||||||
|
}
|
||||||
|
});
|
||||||
match state {
|
match state {
|
||||||
StreamState::Buffering => {
|
StreamState::Buffering => {
|
||||||
json_buffer.push_str(&token_text.replace(" ", ""));
|
json_buffer.push_str(&token_text.replace(" ", ""));
|
||||||
@ -1307,7 +1296,6 @@ pub(crate) async fn chat_completions(
|
|||||||
let event = create_event_from_stream_token(
|
let event = create_event_from_stream_token(
|
||||||
stream_token,
|
stream_token,
|
||||||
logprobs,
|
logprobs,
|
||||||
stream_options.clone(),
|
|
||||||
response_as_tool,
|
response_as_tool,
|
||||||
system_fingerprint.clone(),
|
system_fingerprint.clone(),
|
||||||
model_id.clone(),
|
model_id.clone(),
|
||||||
@ -1347,7 +1335,6 @@ pub(crate) async fn chat_completions(
|
|||||||
current_time,
|
current_time,
|
||||||
None,
|
None,
|
||||||
None,
|
None,
|
||||||
None,
|
|
||||||
));
|
));
|
||||||
yield Ok(event.json_data(chat_complete).unwrap_or_else(|e| {
|
yield Ok(event.json_data(chat_complete).unwrap_or_else(|e| {
|
||||||
InferError::StreamSerializationError(e.to_string()).into()
|
InferError::StreamSerializationError(e.to_string()).into()
|
||||||
@ -1369,7 +1356,6 @@ pub(crate) async fn chat_completions(
|
|||||||
let event = create_event_from_stream_token(
|
let event = create_event_from_stream_token(
|
||||||
&stream_token,
|
&stream_token,
|
||||||
logprobs,
|
logprobs,
|
||||||
stream_options.clone(),
|
|
||||||
response_as_tool,
|
response_as_tool,
|
||||||
system_fingerprint.clone(),
|
system_fingerprint.clone(),
|
||||||
model_id.clone(),
|
model_id.clone(),
|
||||||
@ -1378,6 +1364,36 @@ pub(crate) async fn chat_completions(
|
|||||||
yield Ok::<Event, Infallible>(event);
|
yield Ok::<Event, Infallible>(event);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let should_send_usage = usage.is_some()
|
||||||
|
&& stream_options
|
||||||
|
.as_ref()
|
||||||
|
.is_some_and(|opts| opts.include_usage);
|
||||||
|
|
||||||
|
if should_send_usage {
|
||||||
|
let usage_data = usage.unwrap();
|
||||||
|
let current_time = std::time::SystemTime::now()
|
||||||
|
.duration_since(std::time::UNIX_EPOCH)
|
||||||
|
.unwrap_or_else(|_| std::time::Duration::from_secs(0))
|
||||||
|
.as_secs();
|
||||||
|
|
||||||
|
let chat_complete = CompletionType::ChatCompletionChunk(ChatCompletionChunk {
|
||||||
|
id: String::new(),
|
||||||
|
created: current_time,
|
||||||
|
model: model_id.clone(),
|
||||||
|
system_fingerprint: system_fingerprint.clone(),
|
||||||
|
choices: vec![],
|
||||||
|
usage: Some(Usage {
|
||||||
|
prompt_tokens: usage_data.prompt_tokens,
|
||||||
|
completion_tokens: usage_data.completion_tokens,
|
||||||
|
total_tokens: usage_data.total_tokens,
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
|
||||||
|
yield Ok(Event::default()
|
||||||
|
.json_data(chat_complete)
|
||||||
|
.unwrap_or_else(|e| InferError::StreamSerializationError(e.to_string()).into()));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
Err(err) => yield Ok(err.into_openai_event())
|
Err(err) => yield Ok(err.into_openai_event())
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user