Pr 3003 ci branch (#3007)

* change ChatCompletionChunk to align with "OpenAI Chat Completions streaming API"

Moving after tool_calls2

Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>

add in Buffering..

Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>

fix: handle usage outside of stream state and add tests

Simplifying everything quite a bit.

Remove the unused model_dump.

Clippy.

Clippy ?

Ruff.

Uppgrade the flake for latest transformers.

Upgrade after rebase.

Remove potential footgun.

Fix completion test.

* Clippy.

* Tweak for multi prompt.

* Ruff.

* Update the snapshot a bit.

---------

Co-authored-by: Nicolas Patry <patry.nicolas@protonmail.com>
This commit is contained in:
drbh 2025-03-10 12:56:19 -04:00 committed by GitHub
parent 124398fa57
commit dc5f05f8e6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
14 changed files with 777 additions and 240 deletions

View File

@ -2,10 +2,16 @@
"nodes": { "nodes": {
"cachix": { "cachix": {
"inputs": { "inputs": {
"devenv": ["crate2nix"], "devenv": [
"flake-compat": ["crate2nix"], "crate2nix"
],
"flake-compat": [
"crate2nix"
],
"nixpkgs": "nixpkgs", "nixpkgs": "nixpkgs",
"pre-commit-hooks": ["crate2nix"] "pre-commit-hooks": [
"crate2nix"
]
}, },
"locked": { "locked": {
"lastModified": 1709700175, "lastModified": 1709700175,
@ -24,10 +30,19 @@
}, },
"cachix_2": { "cachix_2": {
"inputs": { "inputs": {
"devenv": ["crate2nix", "crate2nix_stable"], "devenv": [
"flake-compat": ["crate2nix", "crate2nix_stable"], "crate2nix",
"crate2nix_stable"
],
"flake-compat": [
"crate2nix",
"crate2nix_stable"
],
"nixpkgs": "nixpkgs_2", "nixpkgs": "nixpkgs_2",
"pre-commit-hooks": ["crate2nix", "crate2nix_stable"] "pre-commit-hooks": [
"crate2nix",
"crate2nix_stable"
]
}, },
"locked": { "locked": {
"lastModified": 1716549461, "lastModified": 1716549461,
@ -46,8 +61,16 @@
}, },
"cachix_3": { "cachix_3": {
"inputs": { "inputs": {
"devenv": ["crate2nix", "crate2nix_stable", "crate2nix_stable"], "devenv": [
"flake-compat": ["crate2nix", "crate2nix_stable", "crate2nix_stable"], "crate2nix",
"crate2nix_stable",
"crate2nix_stable"
],
"flake-compat": [
"crate2nix",
"crate2nix_stable",
"crate2nix_stable"
],
"nixpkgs": "nixpkgs_3", "nixpkgs": "nixpkgs_3",
"pre-commit-hooks": [ "pre-commit-hooks": [
"crate2nix", "crate2nix",
@ -78,15 +101,18 @@
"flake-compat": "flake-compat_3", "flake-compat": "flake-compat_3",
"flake-parts": "flake-parts_3", "flake-parts": "flake-parts_3",
"nix-test-runner": "nix-test-runner_3", "nix-test-runner": "nix-test-runner_3",
"nixpkgs": ["tgi-nix", "nixpkgs"], "nixpkgs": [
"tgi-nix",
"nixpkgs"
],
"pre-commit-hooks": "pre-commit-hooks_3" "pre-commit-hooks": "pre-commit-hooks_3"
}, },
"locked": { "locked": {
"lastModified": 1734429562, "lastModified": 1739473963,
"narHash": "sha256-V2XNs3Ir8WXNHdocfzkR/fu0FzkZ9uTDJkVecxJrGmQ=", "narHash": "sha256-ItAhpjNUzEWd/cgZVyW/jvoGbCec4TK29e1Mnmn1oJE=",
"owner": "nix-community", "owner": "nix-community",
"repo": "crate2nix", "repo": "crate2nix",
"rev": "8537c2d7cb623679aaeff62c4c4c43a91566ab09", "rev": "be31feae9a82c225c0fd1bdf978565dc452a483a",
"type": "github" "type": "github"
}, },
"original": { "original": {
@ -193,7 +219,11 @@
"devshell_2": { "devshell_2": {
"inputs": { "inputs": {
"flake-utils": "flake-utils_3", "flake-utils": "flake-utils_3",
"nixpkgs": ["crate2nix", "crate2nix_stable", "nixpkgs"] "nixpkgs": [
"crate2nix",
"crate2nix_stable",
"nixpkgs"
]
}, },
"locked": { "locked": {
"lastModified": 1717408969, "lastModified": 1717408969,
@ -212,7 +242,10 @@
"devshell_3": { "devshell_3": {
"inputs": { "inputs": {
"flake-utils": "flake-utils_4", "flake-utils": "flake-utils_4",
"nixpkgs": ["crate2nix", "nixpkgs"] "nixpkgs": [
"crate2nix",
"nixpkgs"
]
}, },
"locked": { "locked": {
"lastModified": 1711099426, "lastModified": 1711099426,
@ -310,7 +343,11 @@
}, },
"flake-parts_2": { "flake-parts_2": {
"inputs": { "inputs": {
"nixpkgs-lib": ["crate2nix", "crate2nix_stable", "nixpkgs"] "nixpkgs-lib": [
"crate2nix",
"crate2nix_stable",
"nixpkgs"
]
}, },
"locked": { "locked": {
"lastModified": 1719745305, "lastModified": 1719745305,
@ -328,7 +365,10 @@
}, },
"flake-parts_3": { "flake-parts_3": {
"inputs": { "inputs": {
"nixpkgs-lib": ["crate2nix", "nixpkgs"] "nixpkgs-lib": [
"crate2nix",
"nixpkgs"
]
}, },
"locked": { "locked": {
"lastModified": 1712014858, "lastModified": 1712014858,
@ -519,7 +559,11 @@
}, },
"gitignore_3": { "gitignore_3": {
"inputs": { "inputs": {
"nixpkgs": ["crate2nix", "pre-commit-hooks", "nixpkgs"] "nixpkgs": [
"crate2nix",
"pre-commit-hooks",
"nixpkgs"
]
}, },
"locked": { "locked": {
"lastModified": 1709087332, "lastModified": 1709087332,
@ -726,10 +770,22 @@
}, },
"pre-commit-hooks_2": { "pre-commit-hooks_2": {
"inputs": { "inputs": {
"flake-compat": ["crate2nix", "crate2nix_stable", "flake-compat"], "flake-compat": [
"crate2nix",
"crate2nix_stable",
"flake-compat"
],
"gitignore": "gitignore_2", "gitignore": "gitignore_2",
"nixpkgs": ["crate2nix", "crate2nix_stable", "nixpkgs"], "nixpkgs": [
"nixpkgs-stable": ["crate2nix", "crate2nix_stable", "nixpkgs"] "crate2nix",
"crate2nix_stable",
"nixpkgs"
],
"nixpkgs-stable": [
"crate2nix",
"crate2nix_stable",
"nixpkgs"
]
}, },
"locked": { "locked": {
"lastModified": 1719259945, "lastModified": 1719259945,
@ -747,11 +803,20 @@
}, },
"pre-commit-hooks_3": { "pre-commit-hooks_3": {
"inputs": { "inputs": {
"flake-compat": ["crate2nix", "flake-compat"], "flake-compat": [
"crate2nix",
"flake-compat"
],
"flake-utils": "flake-utils_5", "flake-utils": "flake-utils_5",
"gitignore": "gitignore_3", "gitignore": "gitignore_3",
"nixpkgs": ["crate2nix", "nixpkgs"], "nixpkgs": [
"nixpkgs-stable": ["crate2nix", "nixpkgs"] "crate2nix",
"nixpkgs"
],
"nixpkgs-stable": [
"crate2nix",
"nixpkgs"
]
}, },
"locked": { "locked": {
"lastModified": 1712055707, "lastModified": 1712055707,
@ -772,21 +837,27 @@
"crate2nix": "crate2nix", "crate2nix": "crate2nix",
"flake-utils": "flake-utils_6", "flake-utils": "flake-utils_6",
"nix-filter": "nix-filter", "nix-filter": "nix-filter",
"nixpkgs": ["tgi-nix", "nixpkgs"], "nixpkgs": [
"tgi-nix",
"nixpkgs"
],
"rust-overlay": "rust-overlay", "rust-overlay": "rust-overlay",
"tgi-nix": "tgi-nix" "tgi-nix": "tgi-nix"
} }
}, },
"rust-overlay": { "rust-overlay": {
"inputs": { "inputs": {
"nixpkgs": ["tgi-nix", "nixpkgs"] "nixpkgs": [
"tgi-nix",
"nixpkgs"
]
}, },
"locked": { "locked": {
"lastModified": 1738549608, "lastModified": 1741141853,
"narHash": "sha256-GdyT9QEUSx5k/n8kILuNy83vxxdyUfJ8jL5mMpQZWfw=", "narHash": "sha256-FauVtC+FbOgkKpGVuQTNxSqrvgbmVc7hFkjn/DacwMo=",
"owner": "oxalica", "owner": "oxalica",
"repo": "rust-overlay", "repo": "rust-overlay",
"rev": "35c6f8c4352f995ecd53896200769f80a3e8f22d", "rev": "02edad1f19d6dec824e0812e4cdc0aa7930ff8ae",
"type": "github" "type": "github"
}, },
"original": { "original": {

View File

@ -8,6 +8,7 @@ from huggingface_hub.inference._generated.types.chat_completion import (
from openai.types.chat.chat_completion_chunk import ( from openai.types.chat.chat_completion_chunk import (
ChatCompletionChunk as OAIChatCompletionChunk, ChatCompletionChunk as OAIChatCompletionChunk,
) )
from openai.types.completion import Completion as OAICompletion
import requests import requests
@ -39,7 +40,6 @@ from typing import Dict, List, Optional
from aiohttp import ClientConnectorError, ClientOSError, ServerDisconnectedError from aiohttp import ClientConnectorError, ClientOSError, ServerDisconnectedError
from docker.errors import NotFound from docker.errors import NotFound
from syrupy.extensions.json import JSONSnapshotExtension from syrupy.extensions.json import JSONSnapshotExtension
from text_generation import AsyncClient from text_generation import AsyncClient
from text_generation.types import ( from text_generation.types import (
BestOfSequence, BestOfSequence,
@ -133,6 +133,7 @@ class ResponseComparator(JSONSnapshotExtension):
or isinstance(data, ChatCompletionComplete) or isinstance(data, ChatCompletionComplete)
or isinstance(data, Completion) or isinstance(data, Completion)
or isinstance(data, OAIChatCompletionChunk) or isinstance(data, OAIChatCompletionChunk)
or isinstance(data, OAICompletion)
): ):
data = data.model_dump() data = data.model_dump()
elif isinstance(data, ChatCompletionStreamOutput) or isinstance( elif isinstance(data, ChatCompletionStreamOutput) or isinstance(

View File

@ -0,0 +1,62 @@
[
{
"choices": [
{
"delta": {
"content": "OK",
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741265520,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": "!",
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741265520,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": "",
"role": "assistant",
"tool_calls": null
},
"finish_reason": "stop",
"index": 0,
"logprobs": null
}
],
"created": 1741265520,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
}
]

View File

@ -0,0 +1,75 @@
[
{
"choices": [
{
"delta": {
"content": "OK",
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741266005,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": "!",
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741266005,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": "",
"role": "assistant",
"tool_calls": null
},
"finish_reason": "stop",
"index": 0,
"logprobs": null
}
],
"created": 1741266005,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [],
"created": 1741266005,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"system_fingerprint": "3.1.2-dev0-native",
"usage": {
"completion_tokens": 3,
"prompt_tokens": 39,
"total_tokens": 42
}
}
]

View File

@ -0,0 +1,71 @@
[
{
"choices": [
{
"delta": {
"content": "OK",
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741265134,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": "!",
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741265134,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": "",
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": "stop",
"index": 0,
"logprobs": null
}
],
"created": 1741265134,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
}
]

View File

@ -0,0 +1,87 @@
[
{
"choices": [
{
"delta": {
"content": "OK",
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741265133,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": "!",
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741265133,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": "",
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": "stop",
"index": 0,
"logprobs": null
}
],
"created": 1741265133,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [],
"created": 1741265133,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "3.1.2-dev0-native",
"usage": {
"completion_tokens": 3,
"completion_tokens_details": null,
"prompt_tokens": 39,
"prompt_tokens_details": null,
"total_tokens": 42
}
}
]

View File

@ -1,17 +1,17 @@
{ {
"choices": [ "choices": [
{
"finish_reason": "length",
"index": 0,
"logprobs": null,
"text": " A Beginners Guide\nDeep learning is a subset"
},
{ {
"finish_reason": "length", "finish_reason": "length",
"index": 1, "index": 1,
"logprobs": null, "logprobs": null,
"text": " This is a question that has puzzled many people for" "text": " This is a question that has puzzled many people for"
}, },
{
"finish_reason": "length",
"index": 0,
"logprobs": null,
"text": " A Beginners Guide\nDeep learning is a subset"
},
{ {
"finish_reason": "length", "finish_reason": "length",
"index": 3, "index": 3,
@ -25,11 +25,11 @@
"text": " Paris\nWhat is the capital of France?\nThe" "text": " Paris\nWhat is the capital of France?\nThe"
} }
], ],
"created": 1725877154, "created": 1741264813,
"id": "", "id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion", "object": "text_completion",
"system_fingerprint": "2.2.1-dev0-native", "system_fingerprint": "3.1.2-dev0-native",
"usage": { "usage": {
"completion_tokens": 40, "completion_tokens": 40,
"prompt_tokens": 22, "prompt_tokens": 22,

View File

@ -8,11 +8,12 @@
"text": " A" "text": " A"
} }
], ],
"created": 1725883643, "created": 1741340006,
"id": "", "id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion", "object": "text_completion",
"system_fingerprint": "2.2.1-dev0-native" "system_fingerprint": "3.1.2-dev0-native",
"usage": null
}, },
{ {
"choices": [ "choices": [
@ -23,11 +24,12 @@
"text": " This" "text": " This"
} }
], ],
"created": 1725883643, "created": 1741340006,
"id": "", "id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion", "object": "text_completion",
"system_fingerprint": "2.2.1-dev0-native" "system_fingerprint": "3.1.2-dev0-native",
"usage": null
}, },
{ {
"choices": [ "choices": [
@ -38,11 +40,12 @@
"text": " Paris" "text": " Paris"
} }
], ],
"created": 1725883643, "created": 1741340006,
"id": "", "id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion", "object": "text_completion",
"system_fingerprint": "2.2.1-dev0-native" "system_fingerprint": "3.1.2-dev0-native",
"usage": null
}, },
{ {
"choices": [ "choices": [
@ -53,11 +56,12 @@
"text": "us" "text": "us"
} }
], ],
"created": 1725883643, "created": 1741340006,
"id": "", "id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion", "object": "text_completion",
"system_fingerprint": "2.2.1-dev0-native" "system_fingerprint": "3.1.2-dev0-native",
"usage": null
}, },
{ {
"choices": [ "choices": [
@ -68,11 +72,12 @@
"text": " Beginner" "text": " Beginner"
} }
], ],
"created": 1725883643, "created": 1741340006,
"id": "", "id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion", "object": "text_completion",
"system_fingerprint": "2.2.1-dev0-native" "system_fingerprint": "3.1.2-dev0-native",
"usage": null
}, },
{ {
"choices": [ "choices": [
@ -83,11 +88,12 @@
"text": " is" "text": " is"
} }
], ],
"created": 1725883643, "created": 1741340006,
"id": "", "id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion", "object": "text_completion",
"system_fingerprint": "2.2.1-dev0-native" "system_fingerprint": "3.1.2-dev0-native",
"usage": null
}, },
{ {
"choices": [ "choices": [
@ -98,11 +104,12 @@
"text": "\n" "text": "\n"
} }
], ],
"created": 1725883643, "created": 1741340007,
"id": "", "id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion", "object": "text_completion",
"system_fingerprint": "2.2.1-dev0-native" "system_fingerprint": "3.1.2-dev0-native",
"usage": null
}, },
{ {
"choices": [ "choices": [
@ -113,11 +120,12 @@
"text": "cul" "text": "cul"
} }
], ],
"created": 1725883643, "created": 1741340007,
"id": "", "id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion", "object": "text_completion",
"system_fingerprint": "2.2.1-dev0-native" "system_fingerprint": "3.1.2-dev0-native",
"usage": null
}, },
{ {
"choices": [ "choices": [
@ -128,11 +136,12 @@
"text": "s" "text": "s"
} }
], ],
"created": 1725883643, "created": 1741340007,
"id": "", "id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion", "object": "text_completion",
"system_fingerprint": "2.2.1-dev0-native" "system_fingerprint": "3.1.2-dev0-native",
"usage": null
}, },
{ {
"choices": [ "choices": [
@ -143,11 +152,12 @@
"text": " a" "text": " a"
} }
], ],
"created": 1725883643, "created": 1741340007,
"id": "", "id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion", "object": "text_completion",
"system_fingerprint": "2.2.1-dev0-native" "system_fingerprint": "3.1.2-dev0-native",
"usage": null
}, },
{ {
"choices": [ "choices": [
@ -158,11 +168,12 @@
"text": "What" "text": "What"
} }
], ],
"created": 1725883643, "created": 1741340007,
"id": "", "id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion", "object": "text_completion",
"system_fingerprint": "2.2.1-dev0-native" "system_fingerprint": "3.1.2-dev0-native",
"usage": null
}, },
{ {
"choices": [ "choices": [
@ -173,11 +184,12 @@
"text": "as" "text": "as"
} }
], ],
"created": 1725883643, "created": 1741340007,
"id": "", "id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion", "object": "text_completion",
"system_fingerprint": "2.2.1-dev0-native" "system_fingerprint": "3.1.2-dev0-native",
"usage": null
}, },
{ {
"choices": [ "choices": [
@ -188,11 +200,12 @@
"text": " Guide" "text": " Guide"
} }
], ],
"created": 1725883643, "created": 1741340007,
"id": "", "id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion", "object": "text_completion",
"system_fingerprint": "2.2.1-dev0-native" "system_fingerprint": "3.1.2-dev0-native",
"usage": null
}, },
{ {
"choices": [ "choices": [
@ -203,11 +216,12 @@
"text": " question" "text": " question"
} }
], ],
"created": 1725883643, "created": 1741340007,
"id": "", "id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion", "object": "text_completion",
"system_fingerprint": "2.2.1-dev0-native" "system_fingerprint": "3.1.2-dev0-native",
"usage": null
}, },
{ {
"choices": [ "choices": [
@ -218,11 +232,12 @@
"text": " is" "text": " is"
} }
], ],
"created": 1725883643, "created": 1741340007,
"id": "", "id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion", "object": "text_completion",
"system_fingerprint": "2.2.1-dev0-native" "system_fingerprint": "3.1.2-dev0-native",
"usage": null
}, },
{ {
"choices": [ "choices": [
@ -233,11 +248,12 @@
"text": "_minus" "text": "_minus"
} }
], ],
"created": 1725883643, "created": 1741340007,
"id": "", "id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion", "object": "text_completion",
"system_fingerprint": "2.2.1-dev0-native" "system_fingerprint": "3.1.2-dev0-native",
"usage": null
}, },
{ {
"choices": [ "choices": [
@ -248,11 +264,12 @@
"text": "\n" "text": "\n"
} }
], ],
"created": 1725883643, "created": 1741340007,
"id": "", "id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion", "object": "text_completion",
"system_fingerprint": "2.2.1-dev0-native" "system_fingerprint": "3.1.2-dev0-native",
"usage": null
}, },
{ {
"choices": [ "choices": [
@ -263,11 +280,12 @@
"text": " that" "text": " that"
} }
], ],
"created": 1725883643, "created": 1741340007,
"id": "", "id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion", "object": "text_completion",
"system_fingerprint": "2.2.1-dev0-native" "system_fingerprint": "3.1.2-dev0-native",
"usage": null
}, },
{ {
"choices": [ "choices": [
@ -278,11 +296,12 @@
"text": " the" "text": " the"
} }
], ],
"created": 1725883643, "created": 1741340007,
"id": "", "id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion", "object": "text_completion",
"system_fingerprint": "2.2.1-dev0-native" "system_fingerprint": "3.1.2-dev0-native",
"usage": null
}, },
{ {
"choices": [ "choices": [
@ -293,11 +312,12 @@
"text": "cul" "text": "cul"
} }
], ],
"created": 1725883643, "created": 1741340007,
"id": "", "id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion", "object": "text_completion",
"system_fingerprint": "2.2.1-dev0-native" "system_fingerprint": "3.1.2-dev0-native",
"usage": null
}, },
{ {
"choices": [ "choices": [
@ -308,11 +328,12 @@
"text": "Deep" "text": "Deep"
} }
], ],
"created": 1725883643, "created": 1741340007,
"id": "", "id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion", "object": "text_completion",
"system_fingerprint": "2.2.1-dev0-native" "system_fingerprint": "3.1.2-dev0-native",
"usage": null
}, },
{ {
"choices": [ "choices": [
@ -323,11 +344,12 @@
"text": " has" "text": " has"
} }
], ],
"created": 1725883643, "created": 1741340007,
"id": "", "id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion", "object": "text_completion",
"system_fingerprint": "2.2.1-dev0-native" "system_fingerprint": "3.1.2-dev0-native",
"usage": null
}, },
{ {
"choices": [ "choices": [
@ -338,11 +360,12 @@
"text": " capital" "text": " capital"
} }
], ],
"created": 1725883643, "created": 1741340007,
"id": "", "id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion", "object": "text_completion",
"system_fingerprint": "2.2.1-dev0-native" "system_fingerprint": "3.1.2-dev0-native",
"usage": null
}, },
{ {
"choices": [ "choices": [
@ -353,11 +376,12 @@
"text": "as" "text": "as"
} }
], ],
"created": 1725883643, "created": 1741340007,
"id": "", "id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion", "object": "text_completion",
"system_fingerprint": "2.2.1-dev0-native" "system_fingerprint": "3.1.2-dev0-native",
"usage": null
}, },
{ {
"choices": [ "choices": [
@ -368,11 +392,12 @@
"text": " learning" "text": " learning"
} }
], ],
"created": 1725883643, "created": 1741340007,
"id": "", "id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion", "object": "text_completion",
"system_fingerprint": "2.2.1-dev0-native" "system_fingerprint": "3.1.2-dev0-native",
"usage": null
}, },
{ {
"choices": [ "choices": [
@ -383,11 +408,12 @@
"text": " puzzled" "text": " puzzled"
} }
], ],
"created": 1725883643, "created": 1741340007,
"id": "", "id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion", "object": "text_completion",
"system_fingerprint": "2.2.1-dev0-native" "system_fingerprint": "3.1.2-dev0-native",
"usage": null
}, },
{ {
"choices": [ "choices": [
@ -398,11 +424,12 @@
"text": " of" "text": " of"
} }
], ],
"created": 1725883643, "created": 1741340007,
"id": "", "id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion", "object": "text_completion",
"system_fingerprint": "2.2.1-dev0-native" "system_fingerprint": "3.1.2-dev0-native",
"usage": null
}, },
{ {
"choices": [ "choices": [
@ -413,11 +440,12 @@
"text": "(s" "text": "(s"
} }
], ],
"created": 1725883643, "created": 1741340007,
"id": "", "id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion", "object": "text_completion",
"system_fingerprint": "2.2.1-dev0-native" "system_fingerprint": "3.1.2-dev0-native",
"usage": null
}, },
{ {
"choices": [ "choices": [
@ -428,11 +456,12 @@
"text": " is" "text": " is"
} }
], ],
"created": 1725883643, "created": 1741340007,
"id": "", "id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion", "object": "text_completion",
"system_fingerprint": "2.2.1-dev0-native" "system_fingerprint": "3.1.2-dev0-native",
"usage": null
}, },
{ {
"choices": [ "choices": [
@ -443,11 +472,12 @@
"text": " many" "text": " many"
} }
], ],
"created": 1725883643, "created": 1741340007,
"id": "", "id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion", "object": "text_completion",
"system_fingerprint": "2.2.1-dev0-native" "system_fingerprint": "3.1.2-dev0-native",
"usage": null
}, },
{ {
"choices": [ "choices": [
@ -458,11 +488,12 @@
"text": " France" "text": " France"
} }
], ],
"created": 1725883643, "created": 1741340007,
"id": "", "id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion", "object": "text_completion",
"system_fingerprint": "2.2.1-dev0-native" "system_fingerprint": "3.1.2-dev0-native",
"usage": null
}, },
{ {
"choices": [ "choices": [
@ -473,11 +504,12 @@
"text": "):\n" "text": "):\n"
} }
], ],
"created": 1725883643, "created": 1741340007,
"id": "", "id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion", "object": "text_completion",
"system_fingerprint": "2.2.1-dev0-native" "system_fingerprint": "3.1.2-dev0-native",
"usage": null
}, },
{ {
"choices": [ "choices": [
@ -488,11 +520,12 @@
"text": " a" "text": " a"
} }
], ],
"created": 1725883643, "created": 1741340007,
"id": "", "id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion", "object": "text_completion",
"system_fingerprint": "2.2.1-dev0-native" "system_fingerprint": "3.1.2-dev0-native",
"usage": null
}, },
{ {
"choices": [ "choices": [
@ -503,11 +536,12 @@
"text": " people" "text": " people"
} }
], ],
"created": 1725883643, "created": 1741340007,
"id": "", "id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion", "object": "text_completion",
"system_fingerprint": "2.2.1-dev0-native" "system_fingerprint": "3.1.2-dev0-native",
"usage": null
}, },
{ {
"choices": [ "choices": [
@ -518,11 +552,12 @@
"text": "?\n" "text": "?\n"
} }
], ],
"created": 1725883643, "created": 1741340007,
"id": "", "id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion", "object": "text_completion",
"system_fingerprint": "2.2.1-dev0-native" "system_fingerprint": "3.1.2-dev0-native",
"usage": null
}, },
{ {
"choices": [ "choices": [
@ -533,11 +568,12 @@
"text": " " "text": " "
} }
], ],
"created": 1725883643, "created": 1741340007,
"id": "", "id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion", "object": "text_completion",
"system_fingerprint": "2.2.1-dev0-native" "system_fingerprint": "3.1.2-dev0-native",
"usage": null
}, },
{ {
"choices": [ "choices": [
@ -548,11 +584,18 @@
"text": " subset" "text": " subset"
} }
], ],
"created": 1725883643, "created": 1741340007,
"id": "", "id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion", "object": "text_completion",
"system_fingerprint": "2.2.1-dev0-native" "system_fingerprint": "3.1.2-dev0-native",
"usage": {
"completion_tokens": 10,
"completion_tokens_details": null,
"prompt_tokens": 6,
"prompt_tokens_details": null,
"total_tokens": 16
}
}, },
{ {
"choices": [ "choices": [
@ -563,11 +606,18 @@
"text": " for" "text": " for"
} }
], ],
"created": 1725883643, "created": 1741340007,
"id": "", "id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion", "object": "text_completion",
"system_fingerprint": "2.2.1-dev0-native" "system_fingerprint": "3.1.2-dev0-native",
"usage": {
"completion_tokens": 10,
"completion_tokens_details": null,
"prompt_tokens": 5,
"prompt_tokens_details": null,
"total_tokens": 15
}
}, },
{ {
"choices": [ "choices": [
@ -578,11 +628,18 @@
"text": "The" "text": "The"
} }
], ],
"created": 1725883643, "created": 1741340007,
"id": "", "id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion", "object": "text_completion",
"system_fingerprint": "2.2.1-dev0-native" "system_fingerprint": "3.1.2-dev0-native",
"usage": {
"completion_tokens": 10,
"completion_tokens_details": null,
"prompt_tokens": 8,
"prompt_tokens_details": null,
"total_tokens": 18
}
}, },
{ {
"choices": [ "choices": [
@ -593,10 +650,17 @@
"text": " \"\"\"\n" "text": " \"\"\"\n"
} }
], ],
"created": 1725883643, "created": 1741340007,
"id": "", "id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion", "object": "text_completion",
"system_fingerprint": "2.2.1-dev0-native" "system_fingerprint": "3.1.2-dev0-native",
"usage": {
"completion_tokens": 10,
"completion_tokens_details": null,
"prompt_tokens": 3,
"prompt_tokens_details": null,
"total_tokens": 13
}
} }
] ]

View File

@ -7,11 +7,11 @@
"text": " A Beginners Guide\nDeep learning is a subset" "text": " A Beginners Guide\nDeep learning is a subset"
} }
], ],
"created": 1725876621, "created": 1741264812,
"id": "", "id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion", "object": "text_completion",
"system_fingerprint": "2.2.1-dev0-native", "system_fingerprint": "3.1.2-dev0-native",
"usage": { "usage": {
"completion_tokens": 10, "completion_tokens": 10,
"prompt_tokens": 6, "prompt_tokens": 6,

View File

@ -12,7 +12,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741338471, "created": 1741373593,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -32,7 +32,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741338471, "created": 1741373593,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -52,7 +52,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741338471, "created": 1741373593,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -72,7 +72,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741338471, "created": 1741373594,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -92,7 +92,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741338472, "created": 1741373594,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -112,7 +112,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741338472, "created": 1741373594,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -132,7 +132,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741338472, "created": 1741373594,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -152,7 +152,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741338472, "created": 1741373594,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -172,7 +172,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741338472, "created": 1741373594,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -192,7 +192,16 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741338472, "created": 1741373594,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [],
"created": 1741373594,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",

View File

@ -0,0 +1,15 @@
import pytest
@pytest.fixture(scope="module")
def chat_handle(launcher):
with launcher(
"meta-llama/Meta-Llama-3.1-8B-Instruct",
) as handle:
yield handle
@pytest.fixture(scope="module")
async def chat_client(chat_handle):
await chat_handle.health(300)
return chat_handle.client

View File

@ -1,11 +1,8 @@
import pytest import pytest
import requests import requests
import json from openai import OpenAI
from aiohttp import ClientSession
from huggingface_hub import InferenceClient from huggingface_hub import InferenceClient
from text_generation.types import Completion
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
def flash_llama_completion_handle(launcher): def flash_llama_completion_handle(launcher):
@ -73,7 +70,6 @@ async def test_flash_llama_completion_stream_usage(
for chunk in stream: for chunk in stream:
# remove "data:" # remove "data:"
chunks.append(chunk) chunks.append(chunk)
print(f"Chunk {chunk}")
if len(chunk.choices) == 1: if len(chunk.choices) == 1:
index = chunk.choices[0].index index = chunk.choices[0].index
assert index == 0 assert index == 0
@ -158,47 +154,29 @@ def test_flash_llama_completion_many_prompts(flash_llama_completion, response_sn
async def test_flash_llama_completion_many_prompts_stream( async def test_flash_llama_completion_many_prompts_stream(
flash_llama_completion, response_snapshot flash_llama_completion, response_snapshot
): ):
request = { client = OpenAI(api_key="xx", base_url=f"{flash_llama_completion.base_url}/v1")
"model": "tgi", stream = client.completions.create(
"prompt": [ model="tgi",
prompt=[
"What is Deep Learning?", "What is Deep Learning?",
"Is water wet?", "Is water wet?",
"What is the capital of France?", "What is the capital of France?",
"def mai", "def mai",
], ],
"max_tokens": 10, max_tokens=10,
"seed": 0, seed=0,
"temperature": 0.0, temperature=0.0,
"stream": True, stream=True,
} )
url = f"{flash_llama_completion.base_url}/v1/completions"
chunks = []
strings = [""] * 4 strings = [""] * 4
async with ClientSession(headers=flash_llama_completion.headers) as session: chunks = []
async with session.post(url, json=request) as response: for chunk in stream:
# iterate over the stream chunks.append(chunk)
async for chunk in response.content.iter_any(): index = chunk.choices[0].index
# remove "data:" assert 0 <= index <= 4
chunk = chunk.decode().split("\n\n") strings[index] += chunk.choices[0].text
# remove "data:" if present
chunk = [c.replace("data:", "") for c in chunk]
# remove empty strings
chunk = [c for c in chunk if c]
# remove completion marking chunk
chunk = [c for c in chunk if c != " [DONE]"]
# parse json
chunk = [json.loads(c) for c in chunk]
for c in chunk:
chunks.append(Completion(**c))
assert "choices" in c
index = c["choices"][0]["index"]
assert 0 <= index <= 4
strings[index] += c["choices"][0]["text"]
assert response.status == 200
assert list(strings) == [ assert list(strings) == [
" A Beginners Guide\nDeep learning is a subset", " A Beginners Guide\nDeep learning is a subset",
" This is a question that has puzzled many people for", " This is a question that has puzzled many people for",
@ -206,3 +184,92 @@ async def test_flash_llama_completion_many_prompts_stream(
'usculas_minusculas(s):\n """\n', 'usculas_minusculas(s):\n """\n',
] ]
assert chunks == response_snapshot assert chunks == response_snapshot
@pytest.mark.release
async def test_chat_openai_usage(flash_llama_completion, response_snapshot):
client = OpenAI(api_key="xx", base_url=f"{flash_llama_completion.base_url}/v1")
stream = client.chat.completions.create(
model="tgi",
messages=[{"role": "user", "content": "Say 'OK!'"}],
stream=True,
max_tokens=10,
seed=42,
stream_options={"include_usage": True},
)
chunks = []
for chunk in stream:
chunks.append(chunk)
for chunk in chunks[:-1]:
assert chunk.usage is None
for chunk in chunks[-1:]:
assert chunk.usage is not None
assert chunks == response_snapshot
@pytest.mark.release
async def test_chat_openai_nousage(flash_llama_completion, response_snapshot):
client = OpenAI(api_key="xx", base_url=f"{flash_llama_completion.base_url}/v1")
stream = client.chat.completions.create(
model="tgi",
messages=[{"role": "user", "content": "Say 'OK!'"}],
stream=True,
max_tokens=10,
seed=42,
stream_options={"include_usage": False},
)
chunks = []
for chunk in stream:
assert chunk.usage is None
chunks.append(chunk)
assert chunks == response_snapshot
@pytest.mark.release
async def test_chat_hfhub_usage(flash_llama_completion, response_snapshot):
client = InferenceClient(base_url=f"{flash_llama_completion.base_url}/v1")
stream = client.chat_completion(
model="tgi",
messages=[{"role": "user", "content": "Say 'OK!'"}],
stream=True,
max_tokens=10,
seed=42,
stream_options={"include_usage": True},
)
chunks = []
for chunk in stream:
chunks.append(chunk)
for chunk in chunks[:-1]:
assert chunk.usage is None
for chunk in chunks[-1:]:
assert chunk.usage is not None
assert chunks == response_snapshot
@pytest.mark.release
async def test_chat_hfhub_nousage(flash_llama_completion, response_snapshot):
client = InferenceClient(base_url=f"{flash_llama_completion.base_url}/v1")
stream = client.chat_completion(
model="tgi",
messages=[{"role": "user", "content": "Say 'OK!'"}],
stream=True,
max_tokens=10,
seed=42,
stream_options={"include_usage": False},
)
chunks = []
for chunk in stream:
assert chunk.usage is None
chunks.append(chunk)
assert chunks == response_snapshot

View File

@ -764,7 +764,6 @@ impl ChatCompletionChunk {
created: u64, created: u64,
logprobs: Option<ChatCompletionLogprobs>, logprobs: Option<ChatCompletionLogprobs>,
finish_reason: Option<String>, finish_reason: Option<String>,
usage: Option<Usage>,
) -> Self { ) -> Self {
let delta = match (delta, tool_calls) { let delta = match (delta, tool_calls) {
(Some(delta), _) => ChatCompletionDelta::Chat(TextMessage { (Some(delta), _) => ChatCompletionDelta::Chat(TextMessage {
@ -801,7 +800,7 @@ impl ChatCompletionChunk {
logprobs, logprobs,
finish_reason, finish_reason,
}], }],
usage, usage: None,
} }
} }
} }

View File

@ -1124,7 +1124,6 @@ enum StreamState {
fn create_event_from_stream_token( fn create_event_from_stream_token(
stream_token: &StreamResponse, stream_token: &StreamResponse,
logprobs: bool, logprobs: bool,
stream_options: Option<StreamOptions>,
inner_using_tools: bool, inner_using_tools: bool,
system_fingerprint: String, system_fingerprint: String,
model_id: String, model_id: String,
@ -1151,30 +1150,10 @@ fn create_event_from_stream_token(
(content, None) (content, None)
}; };
let finish_reason = stream_token
let (usage, finish_reason) = match &stream_token.details { .details
Some(details) => { .as_ref()
let usage = if stream_options .map(|details| details.finish_reason.format(true));
.as_ref()
.map(|s| s.include_usage)
.unwrap_or(false)
{
let completion_tokens = details.generated_tokens;
let prompt_tokens = details.input_length;
let total_tokens = prompt_tokens + completion_tokens;
Some(Usage {
completion_tokens,
prompt_tokens,
total_tokens,
})
} else {
None
};
(usage, Some(details.finish_reason.format(true)))
}
None => (None, None),
};
let chat_complete = CompletionType::ChatCompletionChunk(ChatCompletionChunk::new( let chat_complete = CompletionType::ChatCompletionChunk(ChatCompletionChunk::new(
model_id.clone(), model_id.clone(),
system_fingerprint.clone(), system_fingerprint.clone(),
@ -1183,7 +1162,6 @@ fn create_event_from_stream_token(
current_time, current_time,
logprobs, logprobs,
finish_reason, finish_reason,
usage,
)); ));
event.json_data(chat_complete).unwrap_or_else(|e| { event.json_data(chat_complete).unwrap_or_else(|e| {
@ -1287,6 +1265,17 @@ pub(crate) async fn chat_completions(
match result{ match result{
Ok(stream_token) => { Ok(stream_token) => {
let token_text = &stream_token.token.text.clone(); let token_text = &stream_token.token.text.clone();
let usage = stream_token.details.as_ref().map(|details| {
let completion_tokens = details.generated_tokens;
let prompt_tokens = details.input_length;
let total_tokens = prompt_tokens + completion_tokens;
Usage {
completion_tokens,
prompt_tokens,
total_tokens,
}
});
match state { match state {
StreamState::Buffering => { StreamState::Buffering => {
json_buffer.push_str(&token_text.replace(" ", "")); json_buffer.push_str(&token_text.replace(" ", ""));
@ -1307,7 +1296,6 @@ pub(crate) async fn chat_completions(
let event = create_event_from_stream_token( let event = create_event_from_stream_token(
stream_token, stream_token,
logprobs, logprobs,
stream_options.clone(),
response_as_tool, response_as_tool,
system_fingerprint.clone(), system_fingerprint.clone(),
model_id.clone(), model_id.clone(),
@ -1347,7 +1335,6 @@ pub(crate) async fn chat_completions(
current_time, current_time,
None, None,
None, None,
None,
)); ));
yield Ok(event.json_data(chat_complete).unwrap_or_else(|e| { yield Ok(event.json_data(chat_complete).unwrap_or_else(|e| {
InferError::StreamSerializationError(e.to_string()).into() InferError::StreamSerializationError(e.to_string()).into()
@ -1369,7 +1356,6 @@ pub(crate) async fn chat_completions(
let event = create_event_from_stream_token( let event = create_event_from_stream_token(
&stream_token, &stream_token,
logprobs, logprobs,
stream_options.clone(),
response_as_tool, response_as_tool,
system_fingerprint.clone(), system_fingerprint.clone(),
model_id.clone(), model_id.clone(),
@ -1378,6 +1364,36 @@ pub(crate) async fn chat_completions(
yield Ok::<Event, Infallible>(event); yield Ok::<Event, Infallible>(event);
} }
} }
let should_send_usage = usage.is_some()
&& stream_options
.as_ref()
.is_some_and(|opts| opts.include_usage);
if should_send_usage {
let usage_data = usage.unwrap();
let current_time = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_else(|_| std::time::Duration::from_secs(0))
.as_secs();
let chat_complete = CompletionType::ChatCompletionChunk(ChatCompletionChunk {
id: String::new(),
created: current_time,
model: model_id.clone(),
system_fingerprint: system_fingerprint.clone(),
choices: vec![],
usage: Some(Usage {
prompt_tokens: usage_data.prompt_tokens,
completion_tokens: usage_data.completion_tokens,
total_tokens: usage_data.total_tokens,
}),
});
yield Ok(Event::default()
.json_data(chat_complete)
.unwrap_or_else(|e| InferError::StreamSerializationError(e.to_string()).into()));
}
} }
Err(err) => yield Ok(err.into_openai_event()) Err(err) => yield Ok(err.into_openai_event())
} }