diff --git a/integration-tests/conftest.py b/integration-tests/conftest.py index 4477f647..4607061c 100644 --- a/integration-tests/conftest.py +++ b/integration-tests/conftest.py @@ -269,17 +269,17 @@ class ResponseComparator(JSONSnapshotExtension): def eq_chat_complete_chunk( response: ChatCompletionChunk, other: ChatCompletionChunk ) -> bool: - if response.choices[0].delta.content: + if response.choices[0].delta.content is not None: return ( response.choices[0].delta.content == other.choices[0].delta.content ) - elif response.choices[0].delta.tool_calls: + elif response.choices[0].delta.tool_calls is not None: return ( response.choices[0].delta.tool_calls == other.choices[0].delta.tool_calls ) else: - raise RuntimeError("Invalid empty chat chunk") + raise RuntimeError(f"Invalid empty chat chunk {response} vs {other}") def eq_response(response: Response, other: Response) -> bool: return response.generated_text == other.generated_text and eq_details( diff --git a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools.json b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools.json index 33e223ba..7445099f 100644 --- a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools.json +++ b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools.json @@ -26,11 +26,11 @@ "usage": null } ], - "created": 1732293383, + "created": 1741195536, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion", - "system_fingerprint": "2.4.1-dev0-native", + "system_fingerprint": "3.1.2-dev0-native", "usage": { "completion_tokens": 30, "prompt_tokens": 615, diff --git a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_auto.json b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_auto.json index 92ffbbc1..99018f96 100644 --- a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_auto.json +++ b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_auto.json @@ -26,11 +26,11 @@ "usage": null } ], - "created": 1732293384, + "created": 1741195538, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion", - "system_fingerprint": "2.4.1-dev0-native", + "system_fingerprint": "3.1.2-dev0-native", "usage": { "completion_tokens": 30, "prompt_tokens": 615, diff --git a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_choice.json b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_choice.json index 603c90af..a80a6a23 100644 --- a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_choice.json +++ b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_choice.json @@ -1,7 +1,7 @@ { "choices": [ { - "finish_reason": "eos_token", + "finish_reason": "stop", "index": 0, "logprobs": null, "message": { @@ -13,12 +13,12 @@ "function": { "arguments": { "format": "celsius", - "location": "New York, NY" + "location": "Brooklyn, New York" }, "description": null, "name": "get_current_weather" }, - "id": 0, + "id": "0", "type": "function" } ] @@ -26,14 +26,14 @@ "usage": null } ], - "created": 1712852394, + "created": 1741195540, "id": "", - "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", - "object": "text_completion", - "system_fingerprint": "2.0.1-native", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion", + "system_fingerprint": "3.1.2-dev0-native", "usage": { - "completion_tokens": 48, - "prompt_tokens": 320, - "total_tokens": 368 + "completion_tokens": 30, + "prompt_tokens": 326, + "total_tokens": 356 } } diff --git a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_insufficient_information.json b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_insufficient_information.json index 3ed893fa..9cfea791 100644 --- a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_insufficient_information.json +++ b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_insufficient_information.json @@ -13,14 +13,14 @@ "usage": null } ], - "created": 1728497062, + "created": 1741195542, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion", - "system_fingerprint": "2.4.2-dev0-native", + "system_fingerprint": "3.1.2-dev0-native", "usage": { - "completion_tokens": 23, - "prompt_tokens": 604, - "total_tokens": 627 + "completion_tokens": 22, + "prompt_tokens": 608, + "total_tokens": 630 } } diff --git a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_insufficient_information_stream.json b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_insufficient_information_stream.json index b134004a..34615f8e 100644 --- a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_insufficient_information_stream.json +++ b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_insufficient_information_stream.json @@ -11,10 +11,10 @@ "logprobs": null } ], - "created": 1728497531, + "created": 1741195542, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", - "system_fingerprint": "2.4.2-dev0-native", + "system_fingerprint": "3.1.2-dev0-native", "usage": null } diff --git a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_openai.json b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_openai.json index b91610a3..e6d78924 100644 --- a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_openai.json +++ b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_openai.json @@ -24,7 +24,7 @@ "logprobs": null } ], - "created": 1741183959, + "created": 1741195536, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -57,7 +57,7 @@ "logprobs": null } ], - "created": 1741183959, + "created": 1741195536, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -90,7 +90,7 @@ "logprobs": null } ], - "created": 1741183959, + "created": 1741195536, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -123,7 +123,7 @@ "logprobs": null } ], - "created": 1741183959, + "created": 1741195536, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -156,7 +156,7 @@ "logprobs": null } ], - "created": 1741183959, + "created": 1741195536, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -189,7 +189,7 @@ "logprobs": null } ], - "created": 1741183959, + "created": 1741195536, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -222,7 +222,7 @@ "logprobs": null } ], - "created": 1741183959, + "created": 1741195536, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -255,7 +255,7 @@ "logprobs": null } ], - "created": 1741183959, + "created": 1741195536, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -288,7 +288,7 @@ "logprobs": null } ], - "created": 1741183959, + "created": 1741195536, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -321,7 +321,7 @@ "logprobs": null } ], - "created": 1741183959, + "created": 1741195536, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -354,7 +354,7 @@ "logprobs": null } ], - "created": 1741183959, + "created": 1741195536, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -387,7 +387,7 @@ "logprobs": null } ], - "created": 1741183959, + "created": 1741195536, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -420,7 +420,7 @@ "logprobs": null } ], - "created": 1741183959, + "created": 1741195536, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -453,7 +453,7 @@ "logprobs": null } ], - "created": 1741183959, + "created": 1741195536, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -486,7 +486,7 @@ "logprobs": null } ], - "created": 1741183960, + "created": 1741195536, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -519,7 +519,7 @@ "logprobs": null } ], - "created": 1741183960, + "created": 1741195536, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -552,7 +552,7 @@ "logprobs": null } ], - "created": 1741183960, + "created": 1741195536, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -585,7 +585,7 @@ "logprobs": null } ], - "created": 1741183960, + "created": 1741195536, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -618,7 +618,7 @@ "logprobs": null } ], - "created": 1741183960, + "created": 1741195536, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -651,7 +651,7 @@ "logprobs": null } ], - "created": 1741183960, + "created": 1741195536, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -684,7 +684,7 @@ "logprobs": null } ], - "created": 1741183960, + "created": 1741195536, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -717,7 +717,7 @@ "logprobs": null } ], - "created": 1741183960, + "created": 1741195536, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -750,7 +750,7 @@ "logprobs": null } ], - "created": 1741183960, + "created": 1741195536, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -783,7 +783,7 @@ "logprobs": null } ], - "created": 1741183960, + "created": 1741195536, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -816,7 +816,7 @@ "logprobs": null } ], - "created": 1741183960, + "created": 1741195537, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -849,7 +849,7 @@ "logprobs": null } ], - "created": 1741183960, + "created": 1741195537, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -882,7 +882,7 @@ "logprobs": null } ], - "created": 1741183960, + "created": 1741195537, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -915,7 +915,7 @@ "logprobs": null } ], - "created": 1741183960, + "created": 1741195537, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -948,7 +948,7 @@ "logprobs": null } ], - "created": 1741183960, + "created": 1741195537, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -981,7 +981,7 @@ "logprobs": null } ], - "created": 1741183960, + "created": 1741195537, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", diff --git a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_sea_creatures_stream.json b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_sea_creatures_stream.json index 1362b472..11644190 100644 --- a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_sea_creatures_stream.json +++ b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_sea_creatures_stream.json @@ -11,10 +11,10 @@ "logprobs": null } ], - "created": 1728497461, + "created": 1741195545, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", - "system_fingerprint": "2.4.2-dev0-native", + "system_fingerprint": "3.1.2-dev0-native", "usage": null } diff --git a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_sea_creatures_stream_function_object.json b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_sea_creatures_stream_function_object.json index bb8d61c8..713e7a56 100644 --- a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_sea_creatures_stream_function_object.json +++ b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_sea_creatures_stream_function_object.json @@ -3,25 +3,27 @@ { "delta": { "role": "assistant", - "tool_calls": { - "function": { - "arguments": "<|eot_id|>", - "name": null - }, - "id": "", - "index": 0, - "type": "function" - } + "tool_calls": [ + { + "function": { + "arguments": "<|eot_id|>", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] }, "finish_reason": "stop", "index": 0, "logprobs": null } ], - "created": 1732293254, + "created": 1741195554, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", - "system_fingerprint": "2.4.1-dev0-native", + "system_fingerprint": "3.1.2-dev0-native", "usage": null } diff --git a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_sea_creatures_stream_none.json b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_sea_creatures_stream_none.json index 2ccab4a9..bde28149 100644 --- a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_sea_creatures_stream_none.json +++ b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_sea_creatures_stream_none.json @@ -11,10 +11,10 @@ "logprobs": null } ], - "created": 1729262528, + "created": 1741195551, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", - "system_fingerprint": "2.3.2-dev0-native", + "system_fingerprint": "3.1.2-dev0-native", "usage": null } diff --git a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_sea_creatures_stream_required.json b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_sea_creatures_stream_required.json index dbced5b8..7896607a 100644 --- a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_sea_creatures_stream_required.json +++ b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_sea_creatures_stream_required.json @@ -4,25 +4,27 @@ "delta": { "content": null, "role": "assistant", - "tool_calls": { - "function": { - "arguments": "<|eot_id|>", - "name": null - }, - "id": "", - "index": 0, - "type": "function" - } + "tool_calls": [ + { + "function": { + "arguments": "<|eot_id|>", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] }, "finish_reason": "stop", "index": 0, "logprobs": null } ], - "created": 1732293246, + "created": 1741195548, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", - "system_fingerprint": "2.4.1-dev0-native", + "system_fingerprint": "3.1.2-dev0-native", "usage": null } diff --git a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_stream.json b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_stream.json index 27d2f9ca..92d27f61 100644 --- a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_stream.json +++ b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_stream.json @@ -4,25 +4,27 @@ "delta": { "content": null, "role": "assistant", - "tool_calls": { - "function": { - "arguments": "<|eot_id|>", - "name": null - }, - "id": "", - "index": 0, - "type": "function" - } + "tool_calls": [ + { + "function": { + "arguments": "<|eot_id|>", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] }, "finish_reason": "stop", "index": 0, "logprobs": null } ], - "created": 1732293235, + "created": 1741195541, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", - "system_fingerprint": "2.4.1-dev0-native", + "system_fingerprint": "3.1.2-dev0-native", "usage": null } diff --git a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_tool_reply_response.json b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_tool_reply_response.json index 4f10aa3b..33a3bb43 100644 --- a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_tool_reply_response.json +++ b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_tool_reply_response.json @@ -13,11 +13,11 @@ "usage": null } ], - "created": 1739932427, + "created": 1741195556, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion", - "system_fingerprint": "3.1.1-dev0-native", + "system_fingerprint": "3.1.2-dev0-native", "usage": { "completion_tokens": 79, "prompt_tokens": 103, diff --git a/integration-tests/models/test_tools_llama.py b/integration-tests/models/test_tools_llama.py index 1325669f..7fd6cadd 100644 --- a/integration-tests/models/test_tools_llama.py +++ b/integration-tests/models/test_tools_llama.py @@ -246,7 +246,9 @@ async def test_flash_llama_grammar_tools_stream( last_response = None async for response in responses: count += 1 - tool_calls_generated += response.choices[0].delta.tool_calls.function.arguments + tool_calls_generated += ( + response.choices[0].delta.tool_calls[0].function.arguments + ) last_response = response assert response.choices[0].delta.content is None @@ -393,7 +395,9 @@ async def test_flash_llama_grammar_tools_sea_creatures_stream_required( async for response in responses: count += 1 assert response.choices[0].delta.content is None - tool_calls_generated += response.choices[0].delta.tool_calls.function.arguments + tool_calls_generated += ( + response.choices[0].delta.tool_calls[0].function.arguments + ) last_response = response assert count == 29 @@ -491,8 +495,8 @@ async def test_flash_llama_grammar_tools_sea_creatures_stream_function_object( break response = json.loads(line) tool_calls_generated += response["choices"][0]["delta"]["tool_calls"][ - "function" - ]["arguments"] + 0 + ]["function"]["arguments"] last_response = response assert count == 39