mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-24 00:12:08 +00:00
change ChatCompletionChunk to align with "OpenAI Chat Completions streaming API"
Moving after tool_calls2 Signed-off-by: Wang, Yi A <yi.a.wang@intel.com> add in Buffering.. Signed-off-by: Wang, Yi A <yi.a.wang@intel.com> fix: handle usage outside of stream state and add tests Simplifying everything quite a bit. Remove the unused model_dump. Clippy. Clippy ? Ruff. Uppgrade the flake for latest transformers. Upgrade after rebase. Remove potential footgun. Fix completion test.
This commit is contained in:
parent
622908deab
commit
818c8db29a
127
flake.lock
127
flake.lock
@ -2,10 +2,16 @@
|
||||
"nodes": {
|
||||
"cachix": {
|
||||
"inputs": {
|
||||
"devenv": ["crate2nix"],
|
||||
"flake-compat": ["crate2nix"],
|
||||
"devenv": [
|
||||
"crate2nix"
|
||||
],
|
||||
"flake-compat": [
|
||||
"crate2nix"
|
||||
],
|
||||
"nixpkgs": "nixpkgs",
|
||||
"pre-commit-hooks": ["crate2nix"]
|
||||
"pre-commit-hooks": [
|
||||
"crate2nix"
|
||||
]
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1709700175,
|
||||
@ -24,10 +30,19 @@
|
||||
},
|
||||
"cachix_2": {
|
||||
"inputs": {
|
||||
"devenv": ["crate2nix", "crate2nix_stable"],
|
||||
"flake-compat": ["crate2nix", "crate2nix_stable"],
|
||||
"devenv": [
|
||||
"crate2nix",
|
||||
"crate2nix_stable"
|
||||
],
|
||||
"flake-compat": [
|
||||
"crate2nix",
|
||||
"crate2nix_stable"
|
||||
],
|
||||
"nixpkgs": "nixpkgs_2",
|
||||
"pre-commit-hooks": ["crate2nix", "crate2nix_stable"]
|
||||
"pre-commit-hooks": [
|
||||
"crate2nix",
|
||||
"crate2nix_stable"
|
||||
]
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1716549461,
|
||||
@ -46,8 +61,16 @@
|
||||
},
|
||||
"cachix_3": {
|
||||
"inputs": {
|
||||
"devenv": ["crate2nix", "crate2nix_stable", "crate2nix_stable"],
|
||||
"flake-compat": ["crate2nix", "crate2nix_stable", "crate2nix_stable"],
|
||||
"devenv": [
|
||||
"crate2nix",
|
||||
"crate2nix_stable",
|
||||
"crate2nix_stable"
|
||||
],
|
||||
"flake-compat": [
|
||||
"crate2nix",
|
||||
"crate2nix_stable",
|
||||
"crate2nix_stable"
|
||||
],
|
||||
"nixpkgs": "nixpkgs_3",
|
||||
"pre-commit-hooks": [
|
||||
"crate2nix",
|
||||
@ -78,15 +101,18 @@
|
||||
"flake-compat": "flake-compat_3",
|
||||
"flake-parts": "flake-parts_3",
|
||||
"nix-test-runner": "nix-test-runner_3",
|
||||
"nixpkgs": ["tgi-nix", "nixpkgs"],
|
||||
"nixpkgs": [
|
||||
"tgi-nix",
|
||||
"nixpkgs"
|
||||
],
|
||||
"pre-commit-hooks": "pre-commit-hooks_3"
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1734429562,
|
||||
"narHash": "sha256-V2XNs3Ir8WXNHdocfzkR/fu0FzkZ9uTDJkVecxJrGmQ=",
|
||||
"lastModified": 1739473963,
|
||||
"narHash": "sha256-ItAhpjNUzEWd/cgZVyW/jvoGbCec4TK29e1Mnmn1oJE=",
|
||||
"owner": "nix-community",
|
||||
"repo": "crate2nix",
|
||||
"rev": "8537c2d7cb623679aaeff62c4c4c43a91566ab09",
|
||||
"rev": "be31feae9a82c225c0fd1bdf978565dc452a483a",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
@ -193,7 +219,11 @@
|
||||
"devshell_2": {
|
||||
"inputs": {
|
||||
"flake-utils": "flake-utils_3",
|
||||
"nixpkgs": ["crate2nix", "crate2nix_stable", "nixpkgs"]
|
||||
"nixpkgs": [
|
||||
"crate2nix",
|
||||
"crate2nix_stable",
|
||||
"nixpkgs"
|
||||
]
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1717408969,
|
||||
@ -212,7 +242,10 @@
|
||||
"devshell_3": {
|
||||
"inputs": {
|
||||
"flake-utils": "flake-utils_4",
|
||||
"nixpkgs": ["crate2nix", "nixpkgs"]
|
||||
"nixpkgs": [
|
||||
"crate2nix",
|
||||
"nixpkgs"
|
||||
]
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1711099426,
|
||||
@ -310,7 +343,11 @@
|
||||
},
|
||||
"flake-parts_2": {
|
||||
"inputs": {
|
||||
"nixpkgs-lib": ["crate2nix", "crate2nix_stable", "nixpkgs"]
|
||||
"nixpkgs-lib": [
|
||||
"crate2nix",
|
||||
"crate2nix_stable",
|
||||
"nixpkgs"
|
||||
]
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1719745305,
|
||||
@ -328,7 +365,10 @@
|
||||
},
|
||||
"flake-parts_3": {
|
||||
"inputs": {
|
||||
"nixpkgs-lib": ["crate2nix", "nixpkgs"]
|
||||
"nixpkgs-lib": [
|
||||
"crate2nix",
|
||||
"nixpkgs"
|
||||
]
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1712014858,
|
||||
@ -519,7 +559,11 @@
|
||||
},
|
||||
"gitignore_3": {
|
||||
"inputs": {
|
||||
"nixpkgs": ["crate2nix", "pre-commit-hooks", "nixpkgs"]
|
||||
"nixpkgs": [
|
||||
"crate2nix",
|
||||
"pre-commit-hooks",
|
||||
"nixpkgs"
|
||||
]
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1709087332,
|
||||
@ -726,10 +770,22 @@
|
||||
},
|
||||
"pre-commit-hooks_2": {
|
||||
"inputs": {
|
||||
"flake-compat": ["crate2nix", "crate2nix_stable", "flake-compat"],
|
||||
"flake-compat": [
|
||||
"crate2nix",
|
||||
"crate2nix_stable",
|
||||
"flake-compat"
|
||||
],
|
||||
"gitignore": "gitignore_2",
|
||||
"nixpkgs": ["crate2nix", "crate2nix_stable", "nixpkgs"],
|
||||
"nixpkgs-stable": ["crate2nix", "crate2nix_stable", "nixpkgs"]
|
||||
"nixpkgs": [
|
||||
"crate2nix",
|
||||
"crate2nix_stable",
|
||||
"nixpkgs"
|
||||
],
|
||||
"nixpkgs-stable": [
|
||||
"crate2nix",
|
||||
"crate2nix_stable",
|
||||
"nixpkgs"
|
||||
]
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1719259945,
|
||||
@ -747,11 +803,20 @@
|
||||
},
|
||||
"pre-commit-hooks_3": {
|
||||
"inputs": {
|
||||
"flake-compat": ["crate2nix", "flake-compat"],
|
||||
"flake-compat": [
|
||||
"crate2nix",
|
||||
"flake-compat"
|
||||
],
|
||||
"flake-utils": "flake-utils_5",
|
||||
"gitignore": "gitignore_3",
|
||||
"nixpkgs": ["crate2nix", "nixpkgs"],
|
||||
"nixpkgs-stable": ["crate2nix", "nixpkgs"]
|
||||
"nixpkgs": [
|
||||
"crate2nix",
|
||||
"nixpkgs"
|
||||
],
|
||||
"nixpkgs-stable": [
|
||||
"crate2nix",
|
||||
"nixpkgs"
|
||||
]
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1712055707,
|
||||
@ -772,21 +837,27 @@
|
||||
"crate2nix": "crate2nix",
|
||||
"flake-utils": "flake-utils_6",
|
||||
"nix-filter": "nix-filter",
|
||||
"nixpkgs": ["tgi-nix", "nixpkgs"],
|
||||
"nixpkgs": [
|
||||
"tgi-nix",
|
||||
"nixpkgs"
|
||||
],
|
||||
"rust-overlay": "rust-overlay",
|
||||
"tgi-nix": "tgi-nix"
|
||||
}
|
||||
},
|
||||
"rust-overlay": {
|
||||
"inputs": {
|
||||
"nixpkgs": ["tgi-nix", "nixpkgs"]
|
||||
"nixpkgs": [
|
||||
"tgi-nix",
|
||||
"nixpkgs"
|
||||
]
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1738549608,
|
||||
"narHash": "sha256-GdyT9QEUSx5k/n8kILuNy83vxxdyUfJ8jL5mMpQZWfw=",
|
||||
"lastModified": 1741141853,
|
||||
"narHash": "sha256-FauVtC+FbOgkKpGVuQTNxSqrvgbmVc7hFkjn/DacwMo=",
|
||||
"owner": "oxalica",
|
||||
"repo": "rust-overlay",
|
||||
"rev": "35c6f8c4352f995ecd53896200769f80a3e8f22d",
|
||||
"rev": "02edad1f19d6dec824e0812e4cdc0aa7930ff8ae",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
|
@ -39,7 +39,13 @@ from typing import Dict, List, Optional
|
||||
from aiohttp import ClientConnectorError, ClientOSError, ServerDisconnectedError
|
||||
from docker.errors import NotFound
|
||||
from syrupy.extensions.json import JSONSnapshotExtension
|
||||
|
||||
from huggingface_hub.inference._generated.types.chat_completion import (
|
||||
ChatCompletionStreamOutput,
|
||||
ChatCompletionOutput,
|
||||
)
|
||||
from openai.types.chat.chat_completion_chunk import (
|
||||
ChatCompletionChunk as OAIChatCompletionChunk,
|
||||
)
|
||||
from text_generation import AsyncClient
|
||||
from text_generation.types import (
|
||||
BestOfSequence,
|
||||
|
@ -0,0 +1,62 @@
|
||||
[
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "OK",
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1741265520,
|
||||
"id": "",
|
||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion.chunk",
|
||||
"system_fingerprint": "3.1.2-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "!",
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1741265520,
|
||||
"id": "",
|
||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion.chunk",
|
||||
"system_fingerprint": "3.1.2-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "",
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1741265520,
|
||||
"id": "",
|
||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion.chunk",
|
||||
"system_fingerprint": "3.1.2-dev0-native",
|
||||
"usage": null
|
||||
}
|
||||
]
|
@ -0,0 +1,75 @@
|
||||
[
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "OK",
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1741266005,
|
||||
"id": "",
|
||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion.chunk",
|
||||
"system_fingerprint": "3.1.2-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "!",
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1741266005,
|
||||
"id": "",
|
||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion.chunk",
|
||||
"system_fingerprint": "3.1.2-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "",
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1741266005,
|
||||
"id": "",
|
||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion.chunk",
|
||||
"system_fingerprint": "3.1.2-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [],
|
||||
"created": 1741266005,
|
||||
"id": "",
|
||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion.chunk",
|
||||
"system_fingerprint": "3.1.2-dev0-native",
|
||||
"usage": {
|
||||
"completion_tokens": 3,
|
||||
"prompt_tokens": 39,
|
||||
"total_tokens": 42
|
||||
}
|
||||
}
|
||||
]
|
@ -0,0 +1,71 @@
|
||||
[
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "OK",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1741265134,
|
||||
"id": "",
|
||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "3.1.2-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "!",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1741265134,
|
||||
"id": "",
|
||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "3.1.2-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1741265134,
|
||||
"id": "",
|
||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "3.1.2-dev0-native",
|
||||
"usage": null
|
||||
}
|
||||
]
|
@ -0,0 +1,87 @@
|
||||
[
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "OK",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1741265133,
|
||||
"id": "",
|
||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "3.1.2-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "!",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1741265133,
|
||||
"id": "",
|
||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "3.1.2-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1741265133,
|
||||
"id": "",
|
||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "3.1.2-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [],
|
||||
"created": 1741265133,
|
||||
"id": "",
|
||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "3.1.2-dev0-native",
|
||||
"usage": {
|
||||
"completion_tokens": 3,
|
||||
"completion_tokens_details": null,
|
||||
"prompt_tokens": 39,
|
||||
"prompt_tokens_details": null,
|
||||
"total_tokens": 42
|
||||
}
|
||||
}
|
||||
]
|
@ -1,17 +1,17 @@
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "length",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": " A Beginner’s Guide\nDeep learning is a subset"
|
||||
},
|
||||
{
|
||||
"finish_reason": "length",
|
||||
"index": 1,
|
||||
"logprobs": null,
|
||||
"text": " This is a question that has puzzled many people for"
|
||||
},
|
||||
{
|
||||
"finish_reason": "length",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": " A Beginner’s Guide\nDeep learning is a subset"
|
||||
},
|
||||
{
|
||||
"finish_reason": "length",
|
||||
"index": 3,
|
||||
@ -25,11 +25,11 @@
|
||||
"text": " Paris\nWhat is the capital of France?\nThe"
|
||||
}
|
||||
],
|
||||
"created": 1725877154,
|
||||
"created": 1741264813,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "2.2.1-dev0-native",
|
||||
"system_fingerprint": "3.1.2-dev0-native",
|
||||
"usage": {
|
||||
"completion_tokens": 40,
|
||||
"prompt_tokens": 22,
|
||||
|
@ -1,602 +0,0 @@
|
||||
[
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": " A"
|
||||
}
|
||||
],
|
||||
"created": 1725883643,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "2.2.1-dev0-native"
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "",
|
||||
"index": 1,
|
||||
"logprobs": null,
|
||||
"text": " This"
|
||||
}
|
||||
],
|
||||
"created": 1725883643,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "2.2.1-dev0-native"
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "",
|
||||
"index": 2,
|
||||
"logprobs": null,
|
||||
"text": " Paris"
|
||||
}
|
||||
],
|
||||
"created": 1725883643,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "2.2.1-dev0-native"
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "",
|
||||
"index": 3,
|
||||
"logprobs": null,
|
||||
"text": "us"
|
||||
}
|
||||
],
|
||||
"created": 1725883643,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "2.2.1-dev0-native"
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": " Beginner"
|
||||
}
|
||||
],
|
||||
"created": 1725883643,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "2.2.1-dev0-native"
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "",
|
||||
"index": 1,
|
||||
"logprobs": null,
|
||||
"text": " is"
|
||||
}
|
||||
],
|
||||
"created": 1725883643,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "2.2.1-dev0-native"
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "",
|
||||
"index": 2,
|
||||
"logprobs": null,
|
||||
"text": "\n"
|
||||
}
|
||||
],
|
||||
"created": 1725883643,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "2.2.1-dev0-native"
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "",
|
||||
"index": 3,
|
||||
"logprobs": null,
|
||||
"text": "cul"
|
||||
}
|
||||
],
|
||||
"created": 1725883643,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "2.2.1-dev0-native"
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": "’s"
|
||||
}
|
||||
],
|
||||
"created": 1725883643,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "2.2.1-dev0-native"
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "",
|
||||
"index": 1,
|
||||
"logprobs": null,
|
||||
"text": " a"
|
||||
}
|
||||
],
|
||||
"created": 1725883643,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "2.2.1-dev0-native"
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "",
|
||||
"index": 2,
|
||||
"logprobs": null,
|
||||
"text": "What"
|
||||
}
|
||||
],
|
||||
"created": 1725883643,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "2.2.1-dev0-native"
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "",
|
||||
"index": 3,
|
||||
"logprobs": null,
|
||||
"text": "as"
|
||||
}
|
||||
],
|
||||
"created": 1725883643,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "2.2.1-dev0-native"
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": " Guide"
|
||||
}
|
||||
],
|
||||
"created": 1725883643,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "2.2.1-dev0-native"
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "",
|
||||
"index": 1,
|
||||
"logprobs": null,
|
||||
"text": " question"
|
||||
}
|
||||
],
|
||||
"created": 1725883643,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "2.2.1-dev0-native"
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "",
|
||||
"index": 2,
|
||||
"logprobs": null,
|
||||
"text": " is"
|
||||
}
|
||||
],
|
||||
"created": 1725883643,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "2.2.1-dev0-native"
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "",
|
||||
"index": 3,
|
||||
"logprobs": null,
|
||||
"text": "_minus"
|
||||
}
|
||||
],
|
||||
"created": 1725883643,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "2.2.1-dev0-native"
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": "\n"
|
||||
}
|
||||
],
|
||||
"created": 1725883643,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "2.2.1-dev0-native"
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "",
|
||||
"index": 1,
|
||||
"logprobs": null,
|
||||
"text": " that"
|
||||
}
|
||||
],
|
||||
"created": 1725883643,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "2.2.1-dev0-native"
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "",
|
||||
"index": 2,
|
||||
"logprobs": null,
|
||||
"text": " the"
|
||||
}
|
||||
],
|
||||
"created": 1725883643,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "2.2.1-dev0-native"
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "",
|
||||
"index": 3,
|
||||
"logprobs": null,
|
||||
"text": "cul"
|
||||
}
|
||||
],
|
||||
"created": 1725883643,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "2.2.1-dev0-native"
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": "Deep"
|
||||
}
|
||||
],
|
||||
"created": 1725883643,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "2.2.1-dev0-native"
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "",
|
||||
"index": 1,
|
||||
"logprobs": null,
|
||||
"text": " has"
|
||||
}
|
||||
],
|
||||
"created": 1725883643,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "2.2.1-dev0-native"
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "",
|
||||
"index": 2,
|
||||
"logprobs": null,
|
||||
"text": " capital"
|
||||
}
|
||||
],
|
||||
"created": 1725883643,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "2.2.1-dev0-native"
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "",
|
||||
"index": 3,
|
||||
"logprobs": null,
|
||||
"text": "as"
|
||||
}
|
||||
],
|
||||
"created": 1725883643,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "2.2.1-dev0-native"
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": " learning"
|
||||
}
|
||||
],
|
||||
"created": 1725883643,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "2.2.1-dev0-native"
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "",
|
||||
"index": 1,
|
||||
"logprobs": null,
|
||||
"text": " puzzled"
|
||||
}
|
||||
],
|
||||
"created": 1725883643,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "2.2.1-dev0-native"
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "",
|
||||
"index": 2,
|
||||
"logprobs": null,
|
||||
"text": " of"
|
||||
}
|
||||
],
|
||||
"created": 1725883643,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "2.2.1-dev0-native"
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "",
|
||||
"index": 3,
|
||||
"logprobs": null,
|
||||
"text": "(s"
|
||||
}
|
||||
],
|
||||
"created": 1725883643,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "2.2.1-dev0-native"
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": " is"
|
||||
}
|
||||
],
|
||||
"created": 1725883643,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "2.2.1-dev0-native"
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "",
|
||||
"index": 1,
|
||||
"logprobs": null,
|
||||
"text": " many"
|
||||
}
|
||||
],
|
||||
"created": 1725883643,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "2.2.1-dev0-native"
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "",
|
||||
"index": 2,
|
||||
"logprobs": null,
|
||||
"text": " France"
|
||||
}
|
||||
],
|
||||
"created": 1725883643,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "2.2.1-dev0-native"
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "",
|
||||
"index": 3,
|
||||
"logprobs": null,
|
||||
"text": "):\n"
|
||||
}
|
||||
],
|
||||
"created": 1725883643,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "2.2.1-dev0-native"
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": " a"
|
||||
}
|
||||
],
|
||||
"created": 1725883643,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "2.2.1-dev0-native"
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "",
|
||||
"index": 1,
|
||||
"logprobs": null,
|
||||
"text": " people"
|
||||
}
|
||||
],
|
||||
"created": 1725883643,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "2.2.1-dev0-native"
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "",
|
||||
"index": 2,
|
||||
"logprobs": null,
|
||||
"text": "?\n"
|
||||
}
|
||||
],
|
||||
"created": 1725883643,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "2.2.1-dev0-native"
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "",
|
||||
"index": 3,
|
||||
"logprobs": null,
|
||||
"text": " "
|
||||
}
|
||||
],
|
||||
"created": 1725883643,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "2.2.1-dev0-native"
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "length",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": " subset"
|
||||
}
|
||||
],
|
||||
"created": 1725883643,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "2.2.1-dev0-native"
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "length",
|
||||
"index": 1,
|
||||
"logprobs": null,
|
||||
"text": " for"
|
||||
}
|
||||
],
|
||||
"created": 1725883643,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "2.2.1-dev0-native"
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "length",
|
||||
"index": 2,
|
||||
"logprobs": null,
|
||||
"text": "The"
|
||||
}
|
||||
],
|
||||
"created": 1725883643,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "2.2.1-dev0-native"
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "length",
|
||||
"index": 3,
|
||||
"logprobs": null,
|
||||
"text": " \"\"\"\n"
|
||||
}
|
||||
],
|
||||
"created": 1725883643,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "2.2.1-dev0-native"
|
||||
}
|
||||
]
|
@ -7,11 +7,11 @@
|
||||
"text": " A Beginner’s Guide\nDeep learning is a subset"
|
||||
}
|
||||
],
|
||||
"created": 1725876621,
|
||||
"created": 1741264812,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "2.2.1-dev0-native",
|
||||
"system_fingerprint": "3.1.2-dev0-native",
|
||||
"usage": {
|
||||
"completion_tokens": 10,
|
||||
"prompt_tokens": 6,
|
||||
|
16
integration-tests/models/test_chat_stream_options.py
Normal file
16
integration-tests/models/test_chat_stream_options.py
Normal file
@ -0,0 +1,16 @@
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def chat_handle(launcher):
|
||||
with launcher(
|
||||
"meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
) as handle:
|
||||
yield handle
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
async def chat_client(chat_handle):
|
||||
await chat_handle.health(300)
|
||||
return chat_handle.client
|
||||
|
@ -2,8 +2,8 @@ import pytest
|
||||
import requests
|
||||
import json
|
||||
from aiohttp import ClientSession
|
||||
from openai import OpenAI
|
||||
from huggingface_hub import InferenceClient
|
||||
|
||||
from text_generation.types import Completion
|
||||
|
||||
|
||||
@ -158,47 +158,30 @@ def test_flash_llama_completion_many_prompts(flash_llama_completion, response_sn
|
||||
async def test_flash_llama_completion_many_prompts_stream(
|
||||
flash_llama_completion, response_snapshot
|
||||
):
|
||||
request = {
|
||||
"model": "tgi",
|
||||
"prompt": [
|
||||
client = InferenceClient(base_url=f"{flash_llama_completion.base_url}/v1")
|
||||
stream = client.completion(
|
||||
model="tgi",
|
||||
prompt=[
|
||||
"What is Deep Learning?",
|
||||
"Is water wet?",
|
||||
"What is the capital of France?",
|
||||
"def mai",
|
||||
],
|
||||
"max_tokens": 10,
|
||||
"seed": 0,
|
||||
"temperature": 0.0,
|
||||
"stream": True,
|
||||
}
|
||||
max_tokens=10,
|
||||
seed=0,
|
||||
temperature=0.0,
|
||||
stream=True,
|
||||
)
|
||||
|
||||
url = f"{flash_llama_completion.base_url}/v1/completions"
|
||||
|
||||
chunks = []
|
||||
strings = [""] * 4
|
||||
async with ClientSession(headers=flash_llama_completion.headers) as session:
|
||||
async with session.post(url, json=request) as response:
|
||||
# iterate over the stream
|
||||
async for chunk in response.content.iter_any():
|
||||
# remove "data:"
|
||||
chunk = chunk.decode().split("\n\n")
|
||||
# remove "data:" if present
|
||||
chunk = [c.replace("data:", "") for c in chunk]
|
||||
# remove empty strings
|
||||
chunk = [c for c in chunk if c]
|
||||
# remove completion marking chunk
|
||||
chunk = [c for c in chunk if c != " [DONE]"]
|
||||
# parse json
|
||||
chunk = [json.loads(c) for c in chunk]
|
||||
chunks = []
|
||||
for chunk in stream:
|
||||
chunks.append(chunk)
|
||||
assert "choices" in chunk
|
||||
index = chunk.choices[0].index
|
||||
assert 0 <= index <= 4
|
||||
strings[index] += chunk.choices[0].text
|
||||
|
||||
for c in chunk:
|
||||
chunks.append(Completion(**c))
|
||||
assert "choices" in c
|
||||
index = c["choices"][0]["index"]
|
||||
assert 0 <= index <= 4
|
||||
strings[index] += c["choices"][0]["text"]
|
||||
|
||||
assert response.status == 200
|
||||
assert list(strings) == [
|
||||
" A Beginner’s Guide\nDeep learning is a subset",
|
||||
" This is a question that has puzzled many people for",
|
||||
@ -206,3 +189,92 @@ async def test_flash_llama_completion_many_prompts_stream(
|
||||
'usculas_minusculas(s):\n """\n',
|
||||
]
|
||||
assert chunks == response_snapshot
|
||||
|
||||
|
||||
@pytest.mark.release
|
||||
async def test_chat_openai_usage(flash_llama_completion, response_snapshot):
|
||||
client = OpenAI(api_key="xx", base_url=f"{flash_llama_completion.base_url}/v1")
|
||||
|
||||
stream = client.chat.completions.create(
|
||||
model="tgi",
|
||||
messages=[{"role": "user", "content": "Say 'OK!'"}],
|
||||
stream=True,
|
||||
max_tokens=10,
|
||||
seed=42,
|
||||
stream_options={"include_usage": True},
|
||||
)
|
||||
|
||||
chunks = []
|
||||
for chunk in stream:
|
||||
chunks.append(chunk)
|
||||
for chunk in chunks[:-1]:
|
||||
assert chunk.usage is None
|
||||
for chunk in chunks[-1:]:
|
||||
assert chunk.usage is not None
|
||||
|
||||
assert chunks == response_snapshot
|
||||
|
||||
|
||||
@pytest.mark.release
|
||||
async def test_chat_openai_nousage(flash_llama_completion, response_snapshot):
|
||||
client = OpenAI(api_key="xx", base_url=f"{flash_llama_completion.base_url}/v1")
|
||||
|
||||
stream = client.chat.completions.create(
|
||||
model="tgi",
|
||||
messages=[{"role": "user", "content": "Say 'OK!'"}],
|
||||
stream=True,
|
||||
max_tokens=10,
|
||||
seed=42,
|
||||
stream_options={"include_usage": False},
|
||||
)
|
||||
|
||||
chunks = []
|
||||
for chunk in stream:
|
||||
assert chunk.usage is None
|
||||
chunks.append(chunk)
|
||||
|
||||
assert chunks == response_snapshot
|
||||
|
||||
|
||||
@pytest.mark.release
|
||||
async def test_chat_hfhub_usage(flash_llama_completion, response_snapshot):
|
||||
client = InferenceClient(base_url=f"{flash_llama_completion.base_url}/v1")
|
||||
stream = client.chat_completion(
|
||||
model="tgi",
|
||||
messages=[{"role": "user", "content": "Say 'OK!'"}],
|
||||
stream=True,
|
||||
max_tokens=10,
|
||||
seed=42,
|
||||
stream_options={"include_usage": True},
|
||||
)
|
||||
|
||||
chunks = []
|
||||
for chunk in stream:
|
||||
chunks.append(chunk)
|
||||
|
||||
for chunk in chunks[:-1]:
|
||||
assert chunk.usage is None
|
||||
for chunk in chunks[-1:]:
|
||||
assert chunk.usage is not None
|
||||
|
||||
assert chunks == response_snapshot
|
||||
|
||||
|
||||
@pytest.mark.release
|
||||
async def test_chat_hfhub_nousage(flash_llama_completion, response_snapshot):
|
||||
client = InferenceClient(base_url=f"{flash_llama_completion.base_url}/v1")
|
||||
stream = client.chat_completion(
|
||||
model="tgi",
|
||||
messages=[{"role": "user", "content": "Say 'OK!'"}],
|
||||
stream=True,
|
||||
max_tokens=10,
|
||||
seed=42,
|
||||
stream_options={"include_usage": False},
|
||||
)
|
||||
|
||||
chunks = []
|
||||
for chunk in stream:
|
||||
assert chunk.usage is None
|
||||
chunks.append(chunk)
|
||||
|
||||
assert chunks == response_snapshot
|
||||
|
@ -764,7 +764,6 @@ impl ChatCompletionChunk {
|
||||
created: u64,
|
||||
logprobs: Option<ChatCompletionLogprobs>,
|
||||
finish_reason: Option<String>,
|
||||
usage: Option<Usage>,
|
||||
) -> Self {
|
||||
let delta = match (delta, tool_calls) {
|
||||
(Some(delta), _) => ChatCompletionDelta::Chat(TextMessage {
|
||||
@ -801,7 +800,7 @@ impl ChatCompletionChunk {
|
||||
logprobs,
|
||||
finish_reason,
|
||||
}],
|
||||
usage,
|
||||
usage: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1124,7 +1124,6 @@ enum StreamState {
|
||||
fn create_event_from_stream_token(
|
||||
stream_token: &StreamResponse,
|
||||
logprobs: bool,
|
||||
stream_options: Option<StreamOptions>,
|
||||
inner_using_tools: bool,
|
||||
system_fingerprint: String,
|
||||
model_id: String,
|
||||
@ -1151,30 +1150,10 @@ fn create_event_from_stream_token(
|
||||
|
||||
(content, None)
|
||||
};
|
||||
|
||||
let (usage, finish_reason) = match &stream_token.details {
|
||||
Some(details) => {
|
||||
let usage = if stream_options
|
||||
.as_ref()
|
||||
.map(|s| s.include_usage)
|
||||
.unwrap_or(false)
|
||||
{
|
||||
let completion_tokens = details.generated_tokens;
|
||||
let prompt_tokens = details.input_length;
|
||||
let total_tokens = prompt_tokens + completion_tokens;
|
||||
Some(Usage {
|
||||
completion_tokens,
|
||||
prompt_tokens,
|
||||
total_tokens,
|
||||
})
|
||||
} else {
|
||||
None
|
||||
};
|
||||
(usage, Some(details.finish_reason.format(true)))
|
||||
}
|
||||
None => (None, None),
|
||||
};
|
||||
|
||||
let finish_reason = stream_token
|
||||
.details
|
||||
.as_ref()
|
||||
.map(|details| details.finish_reason.format(true));
|
||||
let chat_complete = CompletionType::ChatCompletionChunk(ChatCompletionChunk::new(
|
||||
model_id.clone(),
|
||||
system_fingerprint.clone(),
|
||||
@ -1183,7 +1162,6 @@ fn create_event_from_stream_token(
|
||||
current_time,
|
||||
logprobs,
|
||||
finish_reason,
|
||||
usage,
|
||||
));
|
||||
|
||||
event.json_data(chat_complete).unwrap_or_else(|e| {
|
||||
@ -1287,6 +1265,17 @@ pub(crate) async fn chat_completions(
|
||||
match result{
|
||||
Ok(stream_token) => {
|
||||
let token_text = &stream_token.token.text.clone();
|
||||
let usage = stream_token.details.as_ref().map(|details| {
|
||||
let completion_tokens = details.generated_tokens;
|
||||
let prompt_tokens = details.input_length;
|
||||
let total_tokens = prompt_tokens + completion_tokens;
|
||||
|
||||
Usage {
|
||||
completion_tokens,
|
||||
prompt_tokens,
|
||||
total_tokens,
|
||||
}
|
||||
});
|
||||
match state {
|
||||
StreamState::Buffering => {
|
||||
json_buffer.push_str(&token_text.replace(" ", ""));
|
||||
@ -1307,7 +1296,6 @@ pub(crate) async fn chat_completions(
|
||||
let event = create_event_from_stream_token(
|
||||
stream_token,
|
||||
logprobs,
|
||||
stream_options.clone(),
|
||||
response_as_tool,
|
||||
system_fingerprint.clone(),
|
||||
model_id.clone(),
|
||||
@ -1347,7 +1335,6 @@ pub(crate) async fn chat_completions(
|
||||
current_time,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
));
|
||||
yield Ok(event.json_data(chat_complete).unwrap_or_else(|e| {
|
||||
InferError::StreamSerializationError(e.to_string()).into()
|
||||
@ -1369,7 +1356,6 @@ pub(crate) async fn chat_completions(
|
||||
let event = create_event_from_stream_token(
|
||||
&stream_token,
|
||||
logprobs,
|
||||
stream_options.clone(),
|
||||
response_as_tool,
|
||||
system_fingerprint.clone(),
|
||||
model_id.clone(),
|
||||
@ -1378,6 +1364,36 @@ pub(crate) async fn chat_completions(
|
||||
yield Ok::<Event, Infallible>(event);
|
||||
}
|
||||
}
|
||||
|
||||
let should_send_usage = usage.is_some()
|
||||
&& stream_options
|
||||
.as_ref()
|
||||
.is_some_and(|opts| opts.include_usage);
|
||||
|
||||
if should_send_usage {
|
||||
let usage_data = usage.unwrap();
|
||||
let current_time = std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.unwrap_or_else(|_| std::time::Duration::from_secs(0))
|
||||
.as_secs();
|
||||
|
||||
let chat_complete = CompletionType::ChatCompletionChunk(ChatCompletionChunk {
|
||||
id: String::new(),
|
||||
created: current_time,
|
||||
model: model_id.clone(),
|
||||
system_fingerprint: system_fingerprint.clone(),
|
||||
choices: vec![],
|
||||
usage: Some(Usage {
|
||||
prompt_tokens: usage_data.prompt_tokens,
|
||||
completion_tokens: usage_data.completion_tokens,
|
||||
total_tokens: usage_data.total_tokens,
|
||||
}),
|
||||
});
|
||||
|
||||
yield Ok(Event::default()
|
||||
.json_data(chat_complete)
|
||||
.unwrap_or_else(|e| InferError::StreamSerializationError(e.to_string()).into()));
|
||||
}
|
||||
}
|
||||
Err(err) => yield Ok(err.into_openai_event())
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user