diff --git a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_auto_nostream.json b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_auto_nostream.json index 06cf038a..d9742497 100644 --- a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_auto_nostream.json +++ b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_auto_nostream.json @@ -10,7 +10,7 @@ "tool_calls": [ { "function": { - "arguments": "{\"format\":\"fahrenheit\",\"location\":\"Brooklyn, NY\"}", + "arguments": "{\"location\":\"Brooklyn, NY\",\"format\":\"fahrenheit\"}", "description": null, "name": "get_current_weather" }, @@ -21,7 +21,7 @@ } } ], - "created": 1741263682, + "created": 1741372434, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion", diff --git a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_choice_nostream.json b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_choice_nostream.json index 0152ea70..1c3a5db3 100644 --- a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_choice_nostream.json +++ b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_choice_nostream.json @@ -10,7 +10,7 @@ "tool_calls": [ { "function": { - "arguments": "{\"format\":\"fahrenheit\",\"location\":\"Brooklyn, NY\"}", + "arguments": "{\"location\":\"Brooklyn, NY\",\"format\":\"fahrenheit\"}", "description": null, "name": "get_current_weather" }, @@ -21,7 +21,7 @@ } } ], - "created": 1741263684, + "created": 1741372657, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion", diff --git a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_choice_stream.json b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_choice_stream.json index 8dab9a5b..3342dbff 100644 --- a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_choice_stream.json +++ b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_choice_stream.json @@ -8,8 +8,8 @@ "tool_calls": [ { "function": { - "arguments": "{\"", - "name": null + "arguments": "{", + "name": "get_current_weather" }, "id": "", "index": 0, @@ -22,187 +22,7 @@ "logprobs": null } ], - "created": 1741263685, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": null, - "role": "assistant", - "tool_calls": [ - { - "function": { - "arguments": "function", - "name": null - }, - "id": "", - "index": 0, - "type": "function" - } - ] - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741263685, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": null, - "role": "assistant", - "tool_calls": [ - { - "function": { - "arguments": "\":", - "name": null - }, - "id": "", - "index": 0, - "type": "function" - } - ] - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741263685, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": null, - "role": "assistant", - "tool_calls": [ - { - "function": { - "arguments": " {\"", - "name": null - }, - "id": "", - "index": 0, - "type": "function" - } - ] - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741263685, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": null, - "role": "assistant", - "tool_calls": [ - { - "function": { - "arguments": "_", - "name": null - }, - "id": "", - "index": 0, - "type": "function" - } - ] - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741263685, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": null, - "role": "assistant", - "tool_calls": [ - { - "function": { - "arguments": "name", - "name": null - }, - "id": "", - "index": 0, - "type": "function" - } - ] - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741263685, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": null, - "role": "assistant", - "tool_calls": [ - { - "function": { - "arguments": "\":", - "name": null - }, - "id": "", - "index": 0, - "type": "function" - } - ] - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741263685, + "created": 1741371719, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -232,157 +52,7 @@ "logprobs": null } ], - "created": 1741263685, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": null, - "role": "assistant", - "tool_calls": [ - { - "function": { - "arguments": "get", - "name": null - }, - "id": "", - "index": 0, - "type": "function" - } - ] - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741263685, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": null, - "role": "assistant", - "tool_calls": [ - { - "function": { - "arguments": "_current", - "name": null - }, - "id": "", - "index": 0, - "type": "function" - } - ] - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741263685, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": null, - "role": "assistant", - "tool_calls": [ - { - "function": { - "arguments": "_weather", - "name": null - }, - "id": "", - "index": 0, - "type": "function" - } - ] - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741263685, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": null, - "role": "assistant", - "tool_calls": [ - { - "function": { - "arguments": "\",", - "name": null - }, - "id": "", - "index": 0, - "type": "function" - } - ] - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741263685, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": null, - "role": "assistant", - "tool_calls": [ - { - "function": { - "arguments": " \"", - "name": null - }, - "id": "", - "index": 0, - "type": "function" - } - ] - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741263685, + "created": 1741371719, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -412,7 +82,7 @@ "logprobs": null } ], - "created": 1741263685, + "created": 1741371719, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -442,7 +112,7 @@ "logprobs": null } ], - "created": 1741263685, + "created": 1741371719, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -472,7 +142,7 @@ "logprobs": null } ], - "created": 1741263685, + "created": 1741371719, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -488,7 +158,7 @@ "tool_calls": [ { "function": { - "arguments": "Paris", + "arguments": "Bro", "name": null }, "id": "", @@ -502,7 +172,37 @@ "logprobs": null } ], - "created": 1741263685, + "created": 1741371719, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": "oklyn", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741371719, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -532,7 +232,7 @@ "logprobs": null } ], - "created": 1741263685, + "created": 1741371719, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -548,7 +248,7 @@ "tool_calls": [ { "function": { - "arguments": " France", + "arguments": " NY", "name": null }, "id": "", @@ -562,7 +262,7 @@ "logprobs": null } ], - "created": 1741263685, + "created": 1741371719, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -592,7 +292,7 @@ "logprobs": null } ], - "created": 1741263685, + "created": 1741371719, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -622,7 +322,7 @@ "logprobs": null } ], - "created": 1741263685, + "created": 1741371719, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -652,7 +352,7 @@ "logprobs": null } ], - "created": 1741263685, + "created": 1741371719, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -682,7 +382,7 @@ "logprobs": null } ], - "created": 1741263685, + "created": 1741371719, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -712,7 +412,7 @@ "logprobs": null } ], - "created": 1741263685, + "created": 1741371720, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -728,7 +428,7 @@ "tool_calls": [ { "function": { - "arguments": "c", + "arguments": "f", "name": null }, "id": "", @@ -742,7 +442,7 @@ "logprobs": null } ], - "created": 1741263685, + "created": 1741371720, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -758,7 +458,7 @@ "tool_calls": [ { "function": { - "arguments": "elsius", + "arguments": "ahrenheit", "name": null }, "id": "", @@ -772,7 +472,7 @@ "logprobs": null } ], - "created": 1741263685, + "created": 1741371720, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -788,7 +488,7 @@ "tool_calls": [ { "function": { - "arguments": "\"}}", + "arguments": "\"}", "name": null }, "id": "", @@ -802,37 +502,7 @@ "logprobs": null } ], - "created": 1741263685, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": null, - "role": "assistant", - "tool_calls": [ - { - "function": { - "arguments": "<|eot_id|>", - "name": null - }, - "id": "", - "index": 0, - "type": "function" - } - ] - }, - "finish_reason": "stop", - "index": 0, - "logprobs": null - } - ], - "created": 1741263685, + "created": 1741371720, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", diff --git a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_insufficient_information_stream.json b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_insufficient_information_stream.json index b1d4fb87..dc969cee 100644 --- a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_insufficient_information_stream.json +++ b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_insufficient_information_stream.json @@ -1,4 +1,24 @@ [ + { + "choices": [ + { + "delta": { + "content": "", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741364571, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, { "choices": [ { @@ -12,7 +32,7 @@ "logprobs": null } ], - "created": 1741263687, + "created": 1741364571, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -32,7 +52,7 @@ "logprobs": null } ], - "created": 1741263687, + "created": 1741364571, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -52,7 +72,7 @@ "logprobs": null } ], - "created": 1741263687, + "created": 1741364571, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -72,7 +92,7 @@ "logprobs": null } ], - "created": 1741263687, + "created": 1741364571, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -92,7 +112,27 @@ "logprobs": null } ], - "created": 1741263687, + "created": 1741364571, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "!", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741364571, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", diff --git a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_nostream.json b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_nostream.json index 3b22d83e..436c2431 100644 --- a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_nostream.json +++ b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_nostream.json @@ -10,7 +10,7 @@ "tool_calls": [ { "function": { - "arguments": "{\"format\":\"fahrenheit\",\"location\":\"Brooklyn, NY\"}", + "arguments": "{\"location\":\"Brooklyn, NY\",\"format\":\"fahrenheit\"}", "description": null, "name": "get_current_weather" }, @@ -21,7 +21,7 @@ } } ], - "created": 1741263680, + "created": 1741372335, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion", diff --git a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_openai.json b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_openai.json index c8fc50a2..6020cdca 100644 --- a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_openai.json +++ b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_openai.json @@ -10,8 +10,8 @@ "tool_calls": [ { "function": { - "arguments": "{\"", - "name": null + "arguments": "{", + "name": "get_current_weather" }, "id": "", "index": 0, @@ -24,205 +24,7 @@ "logprobs": null } ], - "created": 1741263681, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": null, - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": [ - { - "function": { - "arguments": "function", - "name": null - }, - "id": "", - "index": 0, - "type": "function" - } - ] - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741263681, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": null, - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": [ - { - "function": { - "arguments": "\":", - "name": null - }, - "id": "", - "index": 0, - "type": "function" - } - ] - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741263681, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": null, - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": [ - { - "function": { - "arguments": " {\"", - "name": null - }, - "id": "", - "index": 0, - "type": "function" - } - ] - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741263681, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": null, - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": [ - { - "function": { - "arguments": "_", - "name": null - }, - "id": "", - "index": 0, - "type": "function" - } - ] - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741263681, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": null, - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": [ - { - "function": { - "arguments": "name", - "name": null - }, - "id": "", - "index": 0, - "type": "function" - } - ] - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741263681, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": null, - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": [ - { - "function": { - "arguments": "\":", - "name": null - }, - "id": "", - "index": 0, - "type": "function" - } - ] - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741263681, + "created": 1741370464, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -255,172 +57,7 @@ "logprobs": null } ], - "created": 1741263681, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": null, - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": [ - { - "function": { - "arguments": "get", - "name": null - }, - "id": "", - "index": 0, - "type": "function" - } - ] - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741263681, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": null, - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": [ - { - "function": { - "arguments": "_current", - "name": null - }, - "id": "", - "index": 0, - "type": "function" - } - ] - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741263681, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": null, - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": [ - { - "function": { - "arguments": "_weather", - "name": null - }, - "id": "", - "index": 0, - "type": "function" - } - ] - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741263681, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": null, - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": [ - { - "function": { - "arguments": "\",", - "name": null - }, - "id": "", - "index": 0, - "type": "function" - } - ] - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741263681, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": null, - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": [ - { - "function": { - "arguments": " \"", - "name": null - }, - "id": "", - "index": 0, - "type": "function" - } - ] - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741263681, + "created": 1741370464, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -453,7 +90,7 @@ "logprobs": null } ], - "created": 1741263681, + "created": 1741370464, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -486,7 +123,7 @@ "logprobs": null } ], - "created": 1741263681, + "created": 1741370464, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -519,7 +156,7 @@ "logprobs": null } ], - "created": 1741263681, + "created": 1741370464, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -552,7 +189,7 @@ "logprobs": null } ], - "created": 1741263681, + "created": 1741370464, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -585,7 +222,7 @@ "logprobs": null } ], - "created": 1741263681, + "created": 1741370464, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -618,7 +255,7 @@ "logprobs": null } ], - "created": 1741263681, + "created": 1741370464, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -651,7 +288,7 @@ "logprobs": null } ], - "created": 1741263681, + "created": 1741370464, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -684,7 +321,7 @@ "logprobs": null } ], - "created": 1741263681, + "created": 1741370464, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -717,7 +354,7 @@ "logprobs": null } ], - "created": 1741263681, + "created": 1741370464, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -750,7 +387,7 @@ "logprobs": null } ], - "created": 1741263681, + "created": 1741370465, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -783,7 +420,7 @@ "logprobs": null } ], - "created": 1741263681, + "created": 1741370465, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -816,7 +453,7 @@ "logprobs": null } ], - "created": 1741263681, + "created": 1741370465, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -849,7 +486,7 @@ "logprobs": null } ], - "created": 1741263681, + "created": 1741370465, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -882,7 +519,7 @@ "logprobs": null } ], - "created": 1741263681, + "created": 1741370465, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -901,7 +538,7 @@ "tool_calls": [ { "function": { - "arguments": "\"}}", + "arguments": "\"}", "name": null }, "id": "", @@ -915,40 +552,7 @@ "logprobs": null } ], - "created": 1741263681, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": null, - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": [ - { - "function": { - "arguments": "<|eot_id|>", - "name": null - }, - "id": "", - "index": 0, - "type": "function" - } - ] - }, - "finish_reason": "stop", - "index": 0, - "logprobs": null - } - ], - "created": 1741263681, + "created": 1741370465, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", diff --git a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_sea_creatures_stream_auto.json b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_sea_creatures_stream_auto.json index 4b0f5a07..cdac9bc4 100644 --- a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_sea_creatures_stream_auto.json +++ b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_sea_creatures_stream_auto.json @@ -1,4 +1,24 @@ [ + { + "choices": [ + { + "delta": { + "content": "", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741371722, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, { "choices": [ { @@ -12,7 +32,7 @@ "logprobs": null } ], - "created": 1741263688, + "created": 1741371722, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -32,7 +52,7 @@ "logprobs": null } ], - "created": 1741263688, + "created": 1741371722, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -52,7 +72,7 @@ "logprobs": null } ], - "created": 1741263688, + "created": 1741371722, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -72,7 +92,7 @@ "logprobs": null } ], - "created": 1741263688, + "created": 1741371722, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -92,7 +112,7 @@ "logprobs": null } ], - "created": 1741263688, + "created": 1741371722, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -112,7 +132,7 @@ "logprobs": null } ], - "created": 1741263688, + "created": 1741371722, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -132,7 +152,7 @@ "logprobs": null } ], - "created": 1741263688, + "created": 1741371722, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -152,7 +172,7 @@ "logprobs": null } ], - "created": 1741263688, + "created": 1741371722, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -172,7 +192,7 @@ "logprobs": null } ], - "created": 1741263688, + "created": 1741371722, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -192,7 +212,7 @@ "logprobs": null } ], - "created": 1741263688, + "created": 1741371722, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -212,7 +232,7 @@ "logprobs": null } ], - "created": 1741263688, + "created": 1741371722, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -232,7 +252,7 @@ "logprobs": null } ], - "created": 1741263688, + "created": 1741371722, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -252,7 +272,7 @@ "logprobs": null } ], - "created": 1741263688, + "created": 1741371722, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -272,7 +292,7 @@ "logprobs": null } ], - "created": 1741263688, + "created": 1741371722, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -292,7 +312,7 @@ "logprobs": null } ], - "created": 1741263688, + "created": 1741371722, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -312,7 +332,7 @@ "logprobs": null } ], - "created": 1741263688, + "created": 1741371722, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -332,7 +352,7 @@ "logprobs": null } ], - "created": 1741263688, + "created": 1741371722, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -352,7 +372,7 @@ "logprobs": null } ], - "created": 1741263688, + "created": 1741371723, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -372,7 +392,7 @@ "logprobs": null } ], - "created": 1741263688, + "created": 1741371723, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -392,7 +412,7 @@ "logprobs": null } ], - "created": 1741263688, + "created": 1741371723, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -412,7 +432,7 @@ "logprobs": null } ], - "created": 1741263688, + "created": 1741371723, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -432,7 +452,7 @@ "logprobs": null } ], - "created": 1741263688, + "created": 1741371723, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -452,7 +472,7 @@ "logprobs": null } ], - "created": 1741263688, + "created": 1741371723, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -472,7 +492,7 @@ "logprobs": null } ], - "created": 1741263688, + "created": 1741371723, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -492,7 +512,7 @@ "logprobs": null } ], - "created": 1741263688, + "created": 1741371723, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -512,7 +532,7 @@ "logprobs": null } ], - "created": 1741263689, + "created": 1741371723, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -532,7 +552,7 @@ "logprobs": null } ], - "created": 1741263689, + "created": 1741371723, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -552,7 +572,7 @@ "logprobs": null } ], - "created": 1741263689, + "created": 1741371723, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -572,7 +592,7 @@ "logprobs": null } ], - "created": 1741263689, + "created": 1741371723, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -592,7 +612,7 @@ "logprobs": null } ], - "created": 1741263689, + "created": 1741371723, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -612,7 +632,7 @@ "logprobs": null } ], - "created": 1741263689, + "created": 1741371723, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -632,7 +652,7 @@ "logprobs": null } ], - "created": 1741263689, + "created": 1741371723, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -652,7 +672,7 @@ "logprobs": null } ], - "created": 1741263689, + "created": 1741371723, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -672,7 +692,7 @@ "logprobs": null } ], - "created": 1741263689, + "created": 1741371723, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -692,7 +712,7 @@ "logprobs": null } ], - "created": 1741263689, + "created": 1741371723, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -712,7 +732,7 @@ "logprobs": null } ], - "created": 1741263689, + "created": 1741371723, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -732,7 +752,7 @@ "logprobs": null } ], - "created": 1741263689, + "created": 1741371723, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -752,7 +772,7 @@ "logprobs": null } ], - "created": 1741263689, + "created": 1741371723, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -772,7 +792,7 @@ "logprobs": null } ], - "created": 1741263689, + "created": 1741371723, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -792,7 +812,7 @@ "logprobs": null } ], - "created": 1741263689, + "created": 1741371723, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -812,7 +832,7 @@ "logprobs": null } ], - "created": 1741263689, + "created": 1741371723, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -832,7 +852,7 @@ "logprobs": null } ], - "created": 1741263689, + "created": 1741371723, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -852,7 +872,7 @@ "logprobs": null } ], - "created": 1741263689, + "created": 1741371723, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -872,7 +892,7 @@ "logprobs": null } ], - "created": 1741263689, + "created": 1741371723, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -892,7 +912,7 @@ "logprobs": null } ], - "created": 1741263689, + "created": 1741371723, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -912,7 +932,7 @@ "logprobs": null } ], - "created": 1741263689, + "created": 1741371723, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -932,7 +952,7 @@ "logprobs": null } ], - "created": 1741263689, + "created": 1741371723, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -952,7 +972,7 @@ "logprobs": null } ], - "created": 1741263689, + "created": 1741371724, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -972,7 +992,7 @@ "logprobs": null } ], - "created": 1741263689, + "created": 1741371724, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -992,7 +1012,7 @@ "logprobs": null } ], - "created": 1741263689, + "created": 1741371724, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -1012,7 +1032,7 @@ "logprobs": null } ], - "created": 1741263689, + "created": 1741371724, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -1032,7 +1052,7 @@ "logprobs": null } ], - "created": 1741263689, + "created": 1741371724, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -1052,7 +1072,7 @@ "logprobs": null } ], - "created": 1741263689, + "created": 1741371724, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -1072,7 +1092,7 @@ "logprobs": null } ], - "created": 1741263689, + "created": 1741371724, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -1092,7 +1112,7 @@ "logprobs": null } ], - "created": 1741263690, + "created": 1741371724, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -1112,7 +1132,7 @@ "logprobs": null } ], - "created": 1741263690, + "created": 1741371724, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -1132,7 +1152,7 @@ "logprobs": null } ], - "created": 1741263690, + "created": 1741371724, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -1152,7 +1172,7 @@ "logprobs": null } ], - "created": 1741263690, + "created": 1741371724, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -1172,7 +1192,7 @@ "logprobs": null } ], - "created": 1741263690, + "created": 1741371724, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -1192,7 +1212,7 @@ "logprobs": null } ], - "created": 1741263690, + "created": 1741371724, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -1212,7 +1232,7 @@ "logprobs": null } ], - "created": 1741263690, + "created": 1741371724, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -1232,7 +1252,7 @@ "logprobs": null } ], - "created": 1741263690, + "created": 1741371724, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -1252,7 +1272,7 @@ "logprobs": null } ], - "created": 1741263690, + "created": 1741371724, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -1272,7 +1292,7 @@ "logprobs": null } ], - "created": 1741263690, + "created": 1741371724, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -1292,7 +1312,7 @@ "logprobs": null } ], - "created": 1741263690, + "created": 1741371724, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -1312,7 +1332,7 @@ "logprobs": null } ], - "created": 1741263690, + "created": 1741371724, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -1332,7 +1352,7 @@ "logprobs": null } ], - "created": 1741263690, + "created": 1741371724, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -1352,7 +1372,7 @@ "logprobs": null } ], - "created": 1741263690, + "created": 1741371724, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -1372,7 +1392,7 @@ "logprobs": null } ], - "created": 1741263690, + "created": 1741371724, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -1392,7 +1412,7 @@ "logprobs": null } ], - "created": 1741263690, + "created": 1741371724, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -1412,7 +1432,7 @@ "logprobs": null } ], - "created": 1741263690, + "created": 1741371724, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -1432,7 +1452,7 @@ "logprobs": null } ], - "created": 1741263690, + "created": 1741371724, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -1452,7 +1472,7 @@ "logprobs": null } ], - "created": 1741263690, + "created": 1741371724, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -1472,7 +1492,7 @@ "logprobs": null } ], - "created": 1741263690, + "created": 1741371724, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -1492,7 +1512,7 @@ "logprobs": null } ], - "created": 1741263690, + "created": 1741371724, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -1512,7 +1532,7 @@ "logprobs": null } ], - "created": 1741263690, + "created": 1741371724, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -1532,7 +1552,27 @@ "logprobs": null } ], - "created": 1741263690, + "created": 1741371724, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": ".", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741371725, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", diff --git a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_sea_creatures_stream_function_object.json b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_sea_creatures_stream_function_object.json index b253d465..fe51488c 100644 --- a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_sea_creatures_stream_function_object.json +++ b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_sea_creatures_stream_function_object.json @@ -1,1232 +1 @@ -[ - { - "choices": [ - { - "delta": { - "content": null, - "role": "assistant", - "tool_calls": [ - { - "function": { - "arguments": "{\"", - "name": null - }, - "id": "", - "index": 0, - "type": "function" - } - ] - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741263698, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": null, - "role": "assistant", - "tool_calls": [ - { - "function": { - "arguments": "function", - "name": null - }, - "id": "", - "index": 0, - "type": "function" - } - ] - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741263698, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": null, - "role": "assistant", - "tool_calls": [ - { - "function": { - "arguments": "\":", - "name": null - }, - "id": "", - "index": 0, - "type": "function" - } - ] - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741263698, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": null, - "role": "assistant", - "tool_calls": [ - { - "function": { - "arguments": " {\"", - "name": null - }, - "id": "", - "index": 0, - "type": "function" - } - ] - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741263698, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": null, - "role": "assistant", - "tool_calls": [ - { - "function": { - "arguments": "_", - "name": null - }, - "id": "", - "index": 0, - "type": "function" - } - ] - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741263698, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": null, - "role": "assistant", - "tool_calls": [ - { - "function": { - "arguments": "n", - "name": null - }, - "id": "", - "index": 0, - "type": "function" - } - ] - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741263698, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": null, - "role": "assistant", - "tool_calls": [ - { - "function": { - "arguments": "am", - "name": null - }, - "id": "", - "index": 0, - "type": "function" - } - ] - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741263698, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": null, - "role": "assistant", - "tool_calls": [ - { - "function": { - "arguments": "e", - "name": null - }, - "id": "", - "index": 0, - "type": "function" - } - ] - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741263698, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": null, - "role": "assistant", - "tool_calls": [ - { - "function": { - "arguments": "\":", - "name": null - }, - "id": "", - "index": 0, - "type": "function" - } - ] - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741263698, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": null, - "role": "assistant", - "tool_calls": [ - { - "function": { - "arguments": " \"", - "name": null - }, - "id": "", - "index": 0, - "type": "function" - } - ] - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741263698, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": null, - "role": "assistant", - "tool_calls": [ - { - "function": { - "arguments": "get", - "name": null - }, - "id": "", - "index": 0, - "type": "function" - } - ] - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741263698, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": null, - "role": "assistant", - "tool_calls": [ - { - "function": { - "arguments": "_n", - "name": null - }, - "id": "", - "index": 0, - "type": "function" - } - ] - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741263698, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": null, - "role": "assistant", - "tool_calls": [ - { - "function": { - "arguments": "_day", - "name": null - }, - "id": "", - "index": 0, - "type": "function" - } - ] - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741263698, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": null, - "role": "assistant", - "tool_calls": [ - { - "function": { - "arguments": "_weather", - "name": null - }, - "id": "", - "index": 0, - "type": "function" - } - ] - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741263698, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": null, - "role": "assistant", - "tool_calls": [ - { - "function": { - "arguments": "_fore", - "name": null - }, - "id": "", - "index": 0, - "type": "function" - } - ] - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741263698, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": null, - "role": "assistant", - "tool_calls": [ - { - "function": { - "arguments": "cast", - "name": null - }, - "id": "", - "index": 0, - "type": "function" - } - ] - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741263698, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": null, - "role": "assistant", - "tool_calls": [ - { - "function": { - "arguments": "\",", - "name": null - }, - "id": "", - "index": 0, - "type": "function" - } - ] - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741263698, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": null, - "role": "assistant", - "tool_calls": [ - { - "function": { - "arguments": " \"", - "name": null - }, - "id": "", - "index": 0, - "type": "function" - } - ] - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741263698, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": null, - "role": "assistant", - "tool_calls": [ - { - "function": { - "arguments": "location", - "name": null - }, - "id": "", - "index": 0, - "type": "function" - } - ] - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741263698, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": null, - "role": "assistant", - "tool_calls": [ - { - "function": { - "arguments": "\":", - "name": null - }, - "id": "", - "index": 0, - "type": "function" - } - ] - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741263698, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": null, - "role": "assistant", - "tool_calls": [ - { - "function": { - "arguments": " \"", - "name": null - }, - "id": "", - "index": 0, - "type": "function" - } - ] - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741263698, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": null, - "role": "assistant", - "tool_calls": [ - { - "function": { - "arguments": "San", - "name": null - }, - "id": "", - "index": 0, - "type": "function" - } - ] - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741263699, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": null, - "role": "assistant", - "tool_calls": [ - { - "function": { - "arguments": " Francisco", - "name": null - }, - "id": "", - "index": 0, - "type": "function" - } - ] - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741263699, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": null, - "role": "assistant", - "tool_calls": [ - { - "function": { - "arguments": ",", - "name": null - }, - "id": "", - "index": 0, - "type": "function" - } - ] - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741263699, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": null, - "role": "assistant", - "tool_calls": [ - { - "function": { - "arguments": " CA", - "name": null - }, - "id": "", - "index": 0, - "type": "function" - } - ] - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741263699, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": null, - "role": "assistant", - "tool_calls": [ - { - "function": { - "arguments": "\",", - "name": null - }, - "id": "", - "index": 0, - "type": "function" - } - ] - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741263699, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": null, - "role": "assistant", - "tool_calls": [ - { - "function": { - "arguments": " \"", - "name": null - }, - "id": "", - "index": 0, - "type": "function" - } - ] - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741263699, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": null, - "role": "assistant", - "tool_calls": [ - { - "function": { - "arguments": "format", - "name": null - }, - "id": "", - "index": 0, - "type": "function" - } - ] - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741263699, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": null, - "role": "assistant", - "tool_calls": [ - { - "function": { - "arguments": "\":", - "name": null - }, - "id": "", - "index": 0, - "type": "function" - } - ] - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741263699, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": null, - "role": "assistant", - "tool_calls": [ - { - "function": { - "arguments": " \"", - "name": null - }, - "id": "", - "index": 0, - "type": "function" - } - ] - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741263699, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": null, - "role": "assistant", - "tool_calls": [ - { - "function": { - "arguments": "c", - "name": null - }, - "id": "", - "index": 0, - "type": "function" - } - ] - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741263699, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": null, - "role": "assistant", - "tool_calls": [ - { - "function": { - "arguments": "elsius", - "name": null - }, - "id": "", - "index": 0, - "type": "function" - } - ] - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741263699, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": null, - "role": "assistant", - "tool_calls": [ - { - "function": { - "arguments": "\",", - "name": null - }, - "id": "", - "index": 0, - "type": "function" - } - ] - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741263699, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": null, - "role": "assistant", - "tool_calls": [ - { - "function": { - "arguments": " \"", - "name": null - }, - "id": "", - "index": 0, - "type": "function" - } - ] - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741263699, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": null, - "role": "assistant", - "tool_calls": [ - { - "function": { - "arguments": "num", - "name": null - }, - "id": "", - "index": 0, - "type": "function" - } - ] - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741263699, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": null, - "role": "assistant", - "tool_calls": [ - { - "function": { - "arguments": "_days", - "name": null - }, - "id": "", - "index": 0, - "type": "function" - } - ] - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741263699, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": null, - "role": "assistant", - "tool_calls": [ - { - "function": { - "arguments": "\":", - "name": null - }, - "id": "", - "index": 0, - "type": "function" - } - ] - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741263699, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": null, - "role": "assistant", - "tool_calls": [ - { - "function": { - "arguments": " ", - "name": null - }, - "id": "", - "index": 0, - "type": "function" - } - ] - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741263699, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": null, - "role": "assistant", - "tool_calls": [ - { - "function": { - "arguments": "3", - "name": null - }, - "id": "", - "index": 0, - "type": "function" - } - ] - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741263699, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": null, - "role": "assistant", - "tool_calls": [ - { - "function": { - "arguments": "}}", - "name": null - }, - "id": "", - "index": 0, - "type": "function" - } - ] - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741263699, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": null, - "role": "assistant", - "tool_calls": [ - { - "function": { - "arguments": "<|eot_id|>", - "name": null - }, - "id": "", - "index": 0, - "type": "function" - } - ] - }, - "finish_reason": "stop", - "index": 0, - "logprobs": null - } - ], - "created": 1741263699, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - } -] +[] diff --git a/integration-tests/models/test_tools_llama.py b/integration-tests/models/test_tools_llama.py index ebf69cb7..bb4b308b 100644 --- a/integration-tests/models/test_tools_llama.py +++ b/integration-tests/models/test_tools_llama.py @@ -108,7 +108,7 @@ async def test_flash_llama_grammar_tools_nostream( function=ChatCompletionOutputFunctionDefinition( description=None, name="get_current_weather", - arguments='{"format":"fahrenheit","location":"Brooklyn, NY"}', + arguments='{"location":"Brooklyn, NY","format":"fahrenheit"}', ), ) ] @@ -142,14 +142,15 @@ async def test_flash_llama_grammar_tools_openai( chunks = [] tool = "" + name = "" for chunk in stream: + if chunk.choices[0].delta.tool_calls[0].function.name: + name += chunk.choices[0].delta.tool_calls[0].function.name tool += chunk.choices[0].delta.tool_calls[0].function.arguments chunks.append(chunk) - assert ( - tool - == '{"function": {"_name": "get_current_weather", "location": "Brooklyn, NY", "format": "fahrenheit"}}<|eot_id|>' - ) + assert name == "get_current_weather" + assert tool == '{ "location": "Brooklyn, NY", "format": "fahrenheit"}' assert chunks == response_snapshot @@ -184,7 +185,7 @@ async def test_flash_llama_grammar_tools_auto_nostream( function=ChatCompletionOutputFunctionDefinition( description=None, name="get_current_weather", - arguments='{"format":"fahrenheit","location":"Brooklyn, NY"}', + arguments='{"location":"Brooklyn, NY","format":"fahrenheit"}', ), ) ] @@ -223,7 +224,7 @@ async def test_flash_llama_grammar_tools_choice_nostream( function=ChatCompletionOutputFunctionDefinition( description=None, name="get_current_weather", - arguments='{"format":"fahrenheit","location":"Brooklyn, NY"}', + arguments='{"location":"Brooklyn, NY","format":"fahrenheit"}', ), ) ] @@ -250,23 +251,24 @@ async def test_flash_llama_grammar_tools_choice_stream( }, { "role": "user", - "content": "What is the weather like in Paris, France?", + "content": "What is the weather like in Brooklyn, New York?", }, ], stream=True, ) - tool_calls_generated = "" + arguments = "" chunks = [] + name = "" for chunk in stream: - tool_calls_generated += chunk.choices[0].delta.tool_calls[0].function.arguments + if chunk.choices[0].delta.tool_calls[0].function.name: + name += chunk.choices[0].delta.tool_calls[0].function.name + arguments += chunk.choices[0].delta.tool_calls[0].function.arguments assert chunk.choices[0].delta.content is None chunks.append(chunk) - assert ( - tool_calls_generated - == '{"function": {"_name": "get_current_weather", "location": "Paris, France", "format": "celsius"}}<|eot_id|>' - ) + assert name == "get_current_weather" + assert arguments == '{ "location": "Brooklyn, NY", "format": "fahrenheit"}' assert chunks == response_snapshot @@ -297,8 +299,6 @@ async def test_flash_llama_grammar_tools_insufficient_information_nostream( content_generated = response.choices[0].message.content assert response.choices[0].message.tool_calls is None - ######## FIXME before MERGE ############################ - # TODO This is different from the streaming case, this is NOT normal. assert content_generated == "I am a helpful assistant!" assert response == response_snapshot @@ -334,7 +334,8 @@ async def test_flash_llama_grammar_tools_insufficient_information_stream( chunks.append(chunk) assert chunk.choices[0].delta.tool_calls is None - assert content_generated == "I am a helpful assistant" + ######## This is exactly the same as the non streaming case + assert content_generated == "I am a helpful assistant!" assert chunks == response_snapshot @@ -371,7 +372,7 @@ async def test_flash_llama_grammar_tools_sea_creatures_stream_auto( assert ( content_generated - == "There was a wise old octopus named Oracle. He lived in a cozy little cave beneath the waves with his best friend, a curious seahorse named Finley. One day, Finley met a playful dolphin named Daisy, and the three became inseparable. They spent their days exploring the ocean, playing hide-and-seek, and learning about the wonders of the sea from Oracle" + == "There was a wise old octopus named Oracle. He lived in a cozy little cave beneath the waves with his best friend, a curious seahorse named Finley. One day, Finley met a playful dolphin named Daisy, and the three became inseparable. They spent their days exploring the ocean, playing hide-and-seek, and learning about the wonders of the sea from Oracle." ) assert chunks == response_snapshot @@ -401,14 +402,18 @@ async def test_flash_llama_grammar_tools_sea_creatures_stream_required( ) tool_calls_generated = "" + name = "" chunks = [] for chunk in stream: assert chunk.choices[0].delta.content is None + if chunk.choices[0].delta.tool_calls[0].function.name: + name += chunk.choices[0].delta.tool_calls[0].function.name tool_calls_generated += chunk.choices[0].delta.tool_calls[0].function.arguments + assert name == "get_n_day_weather_forecast" assert ( tool_calls_generated - == '{"function": {"_name": "get_n_day_weather_forecast", "location": "San Francisco, CA", "format": "fahrenheit", "num_days":3}}<|eot_id|>' + == '{ "location": "San Francisco, CA", "format": "fahrenheit", "num_days":3}' ) assert chunks == response_snapshot @@ -479,12 +484,17 @@ async def test_flash_llama_grammar_tools_sea_creatures_stream_function_object( ) chunks = [] tool_calls_generated = "" + name = "" for chunk in stream: + assert chunk.choices[0].delta.content is None + if chunk.choices[0].delta.tool_calls[0].function.name: + name += chunk.choices[0].delta.tool_calls[0].function.name tool_calls_generated += chunk.choices[0].delta.tool_calls[0].function.arguments - chunks.append(chunk) + + assert name == "get_n_day_weather_forecast" assert ( tool_calls_generated - == '{"function": {"_name": "get_n_day_weather_forecast", "location": "San Francisco, CA", "format": "celsius", "num_days": 3}}<|eot_id|>' + == '{ "location": "San Francisco, CA", "format": "celsius", "num_days": 3}' ) assert chunks == response_snapshot diff --git a/router/src/chat.rs b/router/src/chat.rs new file mode 100644 index 00000000..ac132e20 --- /dev/null +++ b/router/src/chat.rs @@ -0,0 +1,725 @@ +use crate::{ + infer::InferError, ChatCompletionChoice, ChatCompletionChunk, ChatCompletionDelta, + ChatCompletionLogprobs, CompletionType, DeltaToolCall, Function, FunctionDefinition, + StreamOptions, StreamResponse, TextMessage, ToolCallDelta, Usage, +}; +use serde::Deserialize; +use serde_json::Value; + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "snake_case")] +enum _NoTool { + NoTool, +} + +#[derive(Debug, Deserialize)] +struct NoToolCall { + _name: _NoTool, + content: String, +} +#[derive(Debug, Deserialize)] +struct NoTool { + function: NoToolCall, +} + +#[derive(Debug, Deserialize)] +struct ToolCall { + _name: String, + #[serde(flatten, default)] + /// Using Map to preserve order + arguments: serde_json::Map, +} +#[derive(Debug, Deserialize)] +struct Call { + function: ToolCall, +} + +pub(crate) fn parse_output( + generated_text: &str, +) -> Result<(Option>, Option), InferError> { + let call: Call = serde_json::from_str(generated_text).map_err(|e| { + InferError::ToolError(format!( + "Failed to parse generated text: {} {:?}", + e, generated_text + )) + })?; + let name = call.function._name; + + match &name[..] { + "no_tool" => { + // parse the content message + let content_message = call + .function + .arguments + .get("content") + .and_then(Value::as_str) + .ok_or_else(|| { + InferError::ToolError("No `content` found in generated text".to_string()) + })? + .to_string(); + Ok((None, Some(content_message))) + } + name => { + let tool_calls = vec![crate::ToolCall { + id: "0".to_string(), + r#type: "function".to_string(), + function: FunctionDefinition { + description: None, + name: name.to_string(), + arguments: serde_json::to_value(call.function.arguments).map_err(|err| { + InferError::ToolError(format!( + "Could not convert arguments to JSON map {err}" + )) + })?, + }, + }]; + Ok((Some(tool_calls), None)) + } + } +} + +/// Convert a StreamResponse into an Event to be sent over SSE +fn create_event_from_stream_token( + stream_token: &StreamResponse, + logprobs: bool, + inner_using_tools: bool, + system_fingerprint: String, + model_id: String, + function_name: Option, +) -> CompletionType { + let current_time = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap_or_else(|_| std::time::Duration::from_secs(0)) + .as_secs(); + + let logprobs = logprobs.then(|| { + ChatCompletionLogprobs::from((stream_token.token.clone(), stream_token.top_tokens.clone())) + }); + + // replace the content with the tool calls if grammar is present + let content = if !stream_token.token.special { + Some(stream_token.token.text.clone()) + } else { + None + }; + let (content, tool_calls) = if inner_using_tools { + // Cast into a vec + (None, content) + } else { + (content, None) + }; + let finish_reason = stream_token + .details + .as_ref() + .map(|details| details.finish_reason.format(true)); + let delta = match (content, tool_calls) { + (Some(delta), _) => ChatCompletionDelta::Chat(TextMessage { + role: "assistant".to_string(), + content: delta, + ..Default::default() + }), + (None, Some(tool_calls)) => ChatCompletionDelta::Tool(ToolCallDelta { + role: "assistant".to_string(), + tool_calls: vec![DeltaToolCall { + index: 0, + id: String::new(), + r#type: "function".to_string(), + function: Function { + name: function_name, + arguments: tool_calls, + }, + }], + }), + (None, None) => ChatCompletionDelta::Chat(TextMessage { + role: "assistant".to_string(), + content: "".to_string(), + ..Default::default() + }), + }; + let choices = vec![ChatCompletionChoice { + index: 0, + delta, + logprobs, + finish_reason, + }]; + CompletionType::ChatCompletionChunk(ChatCompletionChunk::new( + model_id, + system_fingerprint, + current_time, + choices, + None, + )) +} + +enum StreamState { + /// Before the tools was parsed + Buffering, + /// We detected a tool call here + Tool, + /// During the `content` part of the tool call + NoTool, + /// Finishing frames of the ToolCall + NoToolFinish, + /// This is without tool calling + Content, +} + +pub struct ChatState { + state: StreamState, + text: String, + options: StreamOptions, + model_id: String, + fingerprint: String, + logprobs: bool, +} + +impl ChatState { + pub fn new( + using_tools: bool, + options: StreamOptions, + fingerprint: String, + model_id: String, + logprobs: bool, + ) -> Self { + let state = if using_tools { + StreamState::Buffering + } else { + StreamState::Content + }; + let text = String::new(); + Self { + state, + text, + options, + fingerprint, + model_id, + logprobs, + } + } + + pub fn push(&mut self, mut stream_token: StreamResponse) -> Vec { + let mut events = vec![]; + let token_text = &stream_token.token.text; + match self.state { + StreamState::Buffering => { + self.text.push_str(token_text); + // We have a special match for `no_tool` in order to capture directly the `content` + // key which should be re-emitted as raw text. + if let Ok(value) = serde_json::from_str::(&format!("{}\"}}}}", self.text)) { + self.state = StreamState::NoTool; + // Modifiy the content of the token to be whatever was captured by the JSON + stream_token.token.text = value.function.content; + let chat_complete = create_event_from_stream_token( + &stream_token, + self.logprobs, + false, + self.fingerprint.clone(), + self.model_id.clone(), + None, + ); + + events.push(chat_complete); + } + // XXX Caution, here we do not postfix the quote, so that the current output + // Is necessarily finished with quotes for us to be able to parse. + let partial = &self.text; + let partial = partial.trim_end(); + let partial = partial.trim_end_matches(','); + if let Ok(call) = serde_json::from_str::(&format!("{}}}}}", partial)) { + if call.function._name != "no_tool" { + stream_token.token.text = "{".to_string(); + let chat_complete = create_event_from_stream_token( + &stream_token, + self.logprobs, + true, + self.fingerprint.clone(), + self.model_id.clone(), + Some(call.function._name), + ); + + events.push(chat_complete); + self.state = StreamState::Tool; + } + } + } + StreamState::Tool => { + self.text.push_str(token_text); + if serde_json::from_str::(&self.text).is_ok() { + self.state = StreamState::Buffering; + let mut text = stream_token.token.text.trim_end(); + // Effectively trimming only the last closing brace + if text.ends_with('}') { + text = &text[..text.len() - 1]; + } + stream_token.token.text = text.to_string(); + let chat_complete = create_event_from_stream_token( + &stream_token, + self.logprobs, + true, + self.fingerprint.clone(), + self.model_id.clone(), + None, + ); + events.push(chat_complete); + } else { + let chat_complete = create_event_from_stream_token( + &stream_token, + self.logprobs, + true, + self.fingerprint.clone(), + self.model_id.clone(), + None, + ); + + events.push(chat_complete); + } + } + // if we skipped sending the buffer we need to avoid sending the following json key and quotes + // We have remainder tokens, ignore everying, + StreamState::NoToolFinish => {} + StreamState::NoTool => { + self.text.push_str(token_text); + if token_text.contains("\"") || token_text.contains("}") { + let total_text = &self.text; + let total_text = total_text.trim_end(); + let total_text = total_text.trim_end_matches('}'); + let total_text = total_text.trim_end(); + let total_text = total_text.trim_end_matches('"'); + if let Ok(value) = + serde_json::from_str::(&format!("{}\"}}}}", total_text)) + { + if !value.function.content.is_empty() { + let text = token_text.trim_end(); + let text = text.trim_end_matches('}'); + let mut text = text.trim_end(); + // Effectively trim_end_match('"', 1) + // because we do not want to eventually trim finishing escaped quotes + // {{"\"Something\""}} + if text.ends_with("\"") { + text = &text[..text.len() - 1]; + } + stream_token.token.text = text.to_string(); + self.state = StreamState::NoToolFinish; + } + } + } + let chat_complete = create_event_from_stream_token( + &stream_token, + self.logprobs, + false, + self.fingerprint.clone(), + self.model_id.clone(), + None, + ); + + events.push(chat_complete); + } + StreamState::Content => { + let chat_complete = create_event_from_stream_token( + &stream_token, + self.logprobs, + false, + self.fingerprint.clone(), + self.model_id.clone(), + None, + ); + + events.push(chat_complete); + } + } + + if self.options.include_usage { + if let Some(details) = stream_token.details { + let completion_tokens = details.generated_tokens; + let prompt_tokens = details.input_length; + let total_tokens = prompt_tokens + completion_tokens; + + let usage = Usage { + completion_tokens, + prompt_tokens, + total_tokens, + }; + let current_time = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap_or_else(|_| std::time::Duration::from_secs(0)) + .as_secs(); + + let chat_complete = CompletionType::ChatCompletionChunk(ChatCompletionChunk { + id: String::new(), + created: current_time, + model: self.model_id.clone(), + system_fingerprint: self.fingerprint.clone(), + choices: vec![], + usage: Some(Usage { + prompt_tokens: usage.prompt_tokens, + completion_tokens: usage.completion_tokens, + total_tokens: usage.total_tokens, + }), + }); + + events.push(chat_complete); + } + } + events + } +} + +#[cfg(test)] +mod tests { + use crate::{ + ChatCompletionChoice, ChatCompletionDelta, FinishReason, StreamDetails, TextMessage, Token, + }; + + use super::*; + + #[test] + fn test_chat_stream() { + let mut chat_state = ChatState::new( + false, + StreamOptions { + include_usage: false, + }, + "fingerprint".to_string(), + "model_id".to_string(), + false, + ); + + let events = chat_state.push(StreamResponse { + generated_text: None, + token: Token { + id: 42, + text: "Hi".to_string(), + logprob: 0.0, + special: false, + }, + top_tokens: vec![], + index: 0, + details: None, + }); + assert_eq!(events.len(), 1); + match &events[0] { + CompletionType::ChatCompletionChunk(ChatCompletionChunk { choices, .. }) => { + assert_eq!( + choices, + &[ChatCompletionChoice { + index: 0, + delta: ChatCompletionDelta::Chat(TextMessage { + role: "assistant".to_string(), + content: "Hi".to_string(), + tool_call_id: None, + }), + logprobs: None, + finish_reason: None, + }] + ); + } + _ => panic!("Unexpected chunk"), + } + } + + #[test] + fn test_chat_stream_usage() { + let mut chat_state = ChatState::new( + false, + StreamOptions { + include_usage: true, + }, + "fingerprint".to_string(), + "model_id".to_string(), + false, + ); + + let events = chat_state.push(StreamResponse { + generated_text: None, + token: Token { + id: 42, + text: "Hi".to_string(), + logprob: 0.0, + special: false, + }, + top_tokens: vec![], + index: 0, + details: Some(StreamDetails { + input_length: 2, + generated_tokens: 10, + seed: None, + finish_reason: FinishReason::Length, + }), + }); + assert_eq!(events.len(), 2); + match &events[0] { + CompletionType::ChatCompletionChunk(ChatCompletionChunk { choices, .. }) => { + assert_eq!( + choices, + &[ChatCompletionChoice { + index: 0, + delta: ChatCompletionDelta::Chat(TextMessage { + role: "assistant".to_string(), + content: "Hi".to_string(), + tool_call_id: None, + }), + logprobs: None, + // HAS A FINISH REASON + finish_reason: Some("length".to_string()), + }] + ); + } + _ => panic!("Unexpected chunk"), + } + match &events[1] { + CompletionType::ChatCompletionChunk(ChatCompletionChunk { usage, .. }) => { + assert_eq!( + *usage, + Some(Usage { + prompt_tokens: 2, + completion_tokens: 10, + total_tokens: 12, + }) + ); + } + _ => panic!("Unexpected chunk"), + } + } + + #[test] + fn test_chat_stream_tool_no_tool() { + let mut chat_state = ChatState::new( + true, + StreamOptions { + include_usage: true, + }, + "fingerprint".to_string(), + "model_id".to_string(), + false, + ); + + let tokens = vec![ + "{\"".to_string(), + "function".to_string(), + "\":".to_string(), + " {\"".to_string(), + "_".to_string(), + "name".to_string(), + "\":".to_string(), + " \"".to_string(), + "no".to_string(), + "_tool".to_string(), + "\",".to_string(), + " \"".to_string(), + "content".to_string(), + "\":".to_string(), + " \"".to_string(), // Token 14 + "I".to_string(), // Event 1 + " am".to_string(), // Event 2 + " a".to_string(), // Event 3 + " helpful".to_string(), // Event 4 + " assistant".to_string(), // Event 5 + "!\"".to_string(), // Event 6 (with trailing quore removed) + "}".to_string(), + "}".to_string(), + ]; + + // Initial ignored output + for text in &tokens[..14] { + let events = chat_state.push(StreamResponse { + generated_text: None, + token: Token { + id: 42, + text: text.to_string(), + logprob: 0.0, + special: false, + }, + top_tokens: vec![], + index: 0, + details: None, + }); + assert_eq!(events.len(), 0); + } + + // No tool output + let mut output = String::new(); + for text in &tokens[14..14 + 7] { + let events = chat_state.push(StreamResponse { + generated_text: None, + token: Token { + id: 42, + text: text.to_string(), + logprob: 0.0, + special: false, + }, + top_tokens: vec![], + index: 0, + details: None, + }); + assert_eq!(events.len(), 1); + match &events[0] { + CompletionType::ChatCompletionChunk(ChatCompletionChunk { choices, .. }) => { + assert_eq!(choices.len(), 1); + if let ChatCompletionChoice { + delta: ChatCompletionDelta::Chat(TextMessage { content, .. }), + .. + } = &choices[0] + { + output.push_str(content); + } else { + panic!("Expected plain message"); + } + } + _ => panic!("Unexpected chunk"), + } + } + + assert_eq!(output, "I am a helpful assistant!"); + + // No tool finish + for text in &tokens[14 + 7..] { + let events = chat_state.push(StreamResponse { + generated_text: None, + token: Token { + id: 42, + text: text.to_string(), + logprob: 0.0, + special: false, + }, + top_tokens: vec![], + index: 0, + details: None, + }); + assert_eq!(events.len(), 0); + } + } + + #[test] + fn test_chat_stream_tool_get_weather() { + let mut chat_state = ChatState::new( + true, + StreamOptions { + include_usage: true, + }, + "fingerprint".to_string(), + "model_id".to_string(), + false, + ); + + let tokens = vec![ + "{\"".to_string(), + "function".to_string(), + "\":".to_string(), + " {\"".to_string(), + "_".to_string(), + "name".to_string(), + "\":".to_string(), + " \"".to_string(), + "get".to_string(), + "_current".to_string(), + "_weather".to_string(), + "\",".to_string(), + // Event 1 is the function name + // Event 2 is the start of the arguments "{" + " \"".to_string(), // Event 3 + "location".to_string(), // Event 4 + "\":".to_string(), // Event 5 + " \"".to_string(), // Event 6 + "San".to_string(), // Event 7 + " Francisco".to_string(), // Event 8 + ",".to_string(), // Event 9 + " CA".to_string(), // Event 10 + "\",".to_string(), // Event 11 + " \"".to_string(), // Event 12 + "format".to_string(), // Event 13 + "\":".to_string(), // Event 14 + " \"".to_string(), // Event 15 + "c".to_string(), // Event 16 + "elsius".to_string(), // Event 17 + "\"}}".to_string(), // Event 18 retained (trailing brace removed) + ]; + + // Initial ignored output + for text in &tokens[..11] { + let events = chat_state.push(StreamResponse { + generated_text: None, + token: Token { + id: 42, + text: text.to_string(), + logprob: 0.0, + special: false, + }, + top_tokens: vec![], + index: 0, + details: None, + }); + assert_eq!(events.len(), 0, "{events:?}"); + } + + // No tool output + let mut output = String::new(); + let mut output_name = String::new(); + for text in &tokens[11..11 + 17] { + let events = chat_state.push(StreamResponse { + generated_text: None, + token: Token { + id: 42, + text: text.to_string(), + logprob: 0.0, + special: false, + }, + top_tokens: vec![], + index: 0, + details: None, + }); + assert_eq!(events.len(), 1); + match &events[0] { + CompletionType::ChatCompletionChunk(ChatCompletionChunk { choices, .. }) => { + assert_eq!(choices.len(), 1); + if let ChatCompletionChoice { + delta: ChatCompletionDelta::Tool(ToolCallDelta { tool_calls, .. }), + .. + } = &choices[0] + { + assert_eq!(tool_calls.len(), 1); + let DeltaToolCall { + index, + id, + r#type, + function, + } = &tool_calls[0]; + assert_eq!(*index, 0); + assert_eq!(id, ""); + assert_eq!(r#type, "function"); + if let Some(name) = &function.name { + assert_eq!(name, "get_current_weather"); + output_name.push_str(&name); + } + output.push_str(&function.arguments); + } else { + panic!("Expected plain message"); + } + } + _ => panic!("Unexpected chunk"), + } + } + + assert_eq!(output_name, "get_current_weather"); + assert_eq!( + output, + "{ \"location\": \"San Francisco, CA\", \"format\": \"celsius\"}" + ); + + // No tool finish + for text in &tokens[11 + 17..] { + let events = chat_state.push(StreamResponse { + generated_text: None, + token: Token { + id: 42, + text: text.to_string(), + logprob: 0.0, + special: false, + }, + top_tokens: vec![], + index: 0, + details: None, + }); + assert_eq!(events.len(), 0); + } + } +} diff --git a/router/src/lib.rs b/router/src/lib.rs index 08c31b64..73792bab 100644 --- a/router/src/lib.rs +++ b/router/src/lib.rs @@ -8,6 +8,7 @@ pub mod validation; mod kserve; pub mod logging; +mod chat; mod sagemaker; pub mod usage_stats; mod vertex; @@ -541,6 +542,7 @@ pub(crate) struct Chunk { } #[derive(Clone, Deserialize, Serialize, ToSchema)] +#[cfg_attr(test, derive(Debug))] pub(crate) struct ChatCompletion { pub id: String, #[schema(example = "1706270835")] @@ -553,6 +555,7 @@ pub(crate) struct ChatCompletion { } #[derive(Clone, Deserialize, Serialize, ToSchema)] +#[cfg_attr(test, derive(Debug))] pub(crate) struct ChatCompletionComplete { pub index: u32, pub message: OutputMessage, @@ -561,6 +564,7 @@ pub(crate) struct ChatCompletionComplete { } #[derive(Clone, Deserialize, Serialize, ToSchema)] +#[cfg_attr(test, derive(Debug, PartialEq))] pub(crate) struct ChatCompletionLogprobs { content: Vec, } @@ -619,6 +623,7 @@ impl From<(Vec, Vec>)> for ChatCompletionLogprobs { } #[derive(Clone, Deserialize, Serialize, ToSchema)] +#[cfg_attr(test, derive(Debug, PartialEq))] pub(crate) struct ChatCompletionLogprob { token: String, logprob: f32, @@ -626,12 +631,14 @@ pub(crate) struct ChatCompletionLogprob { } #[derive(Clone, Deserialize, Serialize, ToSchema)] +#[cfg_attr(test, derive(Debug, PartialEq))] pub(crate) struct ChatCompletionTopLogprob { token: String, logprob: f32, } #[derive(Clone, Deserialize, Serialize, ToSchema, Default)] +#[cfg_attr(test, derive(Debug, PartialEq))] pub(crate) struct Usage { pub prompt_tokens: u32, pub completion_tokens: u32, @@ -640,6 +647,7 @@ pub(crate) struct Usage { #[derive(Clone, Serialize, ToSchema)] #[serde(tag = "object")] +#[cfg_attr(test, derive(Debug))] enum CompletionType { #[serde(rename = "chat.completion.chunk")] ChatCompletionChunk(ChatCompletionChunk), @@ -707,6 +715,7 @@ impl ChatCompletion { } } #[derive(Clone, Serialize, ToSchema)] +#[cfg_attr(test, derive(Debug))] pub(crate) struct ChatCompletionChunk { pub id: String, #[schema(example = "1706270978")] @@ -719,6 +728,7 @@ pub(crate) struct ChatCompletionChunk { } #[derive(Clone, Serialize, ToSchema)] +#[cfg_attr(test, derive(Debug, PartialEq))] pub(crate) struct ChatCompletionChoice { pub index: u32, pub delta: ChatCompletionDelta, @@ -735,6 +745,7 @@ pub struct ToolCallDelta { #[derive(Clone, Debug, Serialize, ToSchema)] #[serde(untagged)] +#[cfg_attr(test, derive(PartialEq))] enum ChatCompletionDelta { Chat(TextMessage), Tool(ToolCallDelta), @@ -759,48 +770,17 @@ impl ChatCompletionChunk { pub(crate) fn new( model: String, system_fingerprint: String, - delta: Option, - tool_calls: Option>, created: u64, - logprobs: Option, - finish_reason: Option, + choices: Vec, + usage: Option, ) -> Self { - let delta = match (delta, tool_calls) { - (Some(delta), _) => ChatCompletionDelta::Chat(TextMessage { - role: "assistant".to_string(), - content: delta, - ..Default::default() - }), - (None, Some(tool_calls)) => ChatCompletionDelta::Tool(ToolCallDelta { - role: "assistant".to_string(), - tool_calls: vec![DeltaToolCall { - index: 0, - id: String::new(), - r#type: "function".to_string(), - function: Function { - name: None, - arguments: tool_calls[0].to_string(), - }, - }], - }), - (None, None) => ChatCompletionDelta::Chat(TextMessage { - role: "assistant".to_string(), - content: "".to_string(), - ..Default::default() - }), - }; Self { id: String::new(), created, model, system_fingerprint, - choices: vec![ChatCompletionChoice { - index: 0, - delta, - logprobs, - finish_reason, - }], - usage: None, + choices, + usage, } } } @@ -915,7 +895,7 @@ pub(crate) struct ChatRequest { /// Options for streaming response. Only set this when you set stream: true. #[serde(default)] #[schema(nullable = true, example = "null")] - pub stream_options: Option, + pub stream_options: StreamOptions, } impl ChatRequest { @@ -1017,11 +997,12 @@ impl ChatRequest { } } -#[derive(Clone, Deserialize, ToSchema, Serialize)] +#[derive(Clone, Deserialize, ToSchema, Serialize, Default)] #[cfg_attr(test, derive(Debug, PartialEq))] struct StreamOptions { /// If set, an additional chunk will be streamed before the data: [DONE] message. The usage field on this chunk shows the token usage statistics for the entire request, and the choices field will always be an empty array. All other chunks will also include a usage field, but with a null value. #[schema(example = "true")] + #[serde(default)] include_usage: bool, } @@ -1445,7 +1426,7 @@ pub(crate) struct ChatTokenizeResponse { #[serde(transparent)] pub(crate) struct TokenizeResponse(Vec); -#[derive(Serialize, ToSchema)] +#[derive(Serialize, ToSchema, Clone)] pub(crate) struct StreamDetails { #[schema(example = "length")] pub finish_reason: FinishReason, @@ -1457,7 +1438,7 @@ pub(crate) struct StreamDetails { pub input_length: u32, } -#[derive(Serialize, ToSchema)] +#[derive(Serialize, ToSchema, Clone)] pub(crate) struct StreamResponse { pub index: u32, pub token: Token, @@ -1700,9 +1681,25 @@ mod tests { assert!(matches!( request.stream_options, - Some(StreamOptions { + StreamOptions { include_usage: true - }) + } + )); + + let json = json!({ + "model": "", + "messages": [{ + "role": "user", + "content": "Hello" + }] + }); + let request: ChatRequest = serde_json::from_str(json.to_string().as_str()).unwrap(); + + assert!(matches!( + request.stream_options, + StreamOptions { + include_usage: false + } )); } diff --git a/router/src/server.rs b/router/src/server.rs index df9e16ff..824a23bb 100644 --- a/router/src/server.rs +++ b/router/src/server.rs @@ -1,3 +1,4 @@ +use crate::chat::ChatState; /// HTTP Server logic use crate::config::Config; use crate::infer::{Backend, Infer, InferError, InferResponse, InferStreamResponse}; @@ -47,8 +48,6 @@ use http::header::AUTHORIZATION; use metrics_exporter_prometheus::{Matcher, PrometheusBuilder, PrometheusHandle}; use pyo3::prelude::*; use pyo3::types::IntoPyDict; -use regex::Regex; -use serde_json::Value; use std::convert::Infallible; use std::fs::File; use std::io::BufReader; @@ -1114,62 +1113,6 @@ pub(crate) async fn completions( } } -enum StreamState { - Buffering, - BufferTrailing, - Content { skip_close_quote: bool }, -} - -/// Convert a StreamResponse into an Event to be sent over SSE -fn create_event_from_stream_token( - stream_token: &StreamResponse, - logprobs: bool, - inner_using_tools: bool, - system_fingerprint: String, - model_id: String, -) -> Event { - let event = Event::default(); - let current_time = std::time::SystemTime::now() - .duration_since(std::time::UNIX_EPOCH) - .unwrap_or_else(|_| std::time::Duration::from_secs(0)) - .as_secs(); - - let logprobs = logprobs.then(|| { - ChatCompletionLogprobs::from((stream_token.token.clone(), stream_token.top_tokens.clone())) - }); - - // replace the content with the tool calls if grammar is present - let (content, tool_calls) = if inner_using_tools { - (None, Some(vec![stream_token.token.text.clone()])) - } else { - let content = if !stream_token.token.special { - Some(stream_token.token.text.clone()) - } else { - None - }; - - (content, None) - }; - let finish_reason = stream_token - .details - .as_ref() - .map(|details| details.finish_reason.format(true)); - let chat_complete = CompletionType::ChatCompletionChunk(ChatCompletionChunk::new( - model_id.clone(), - system_fingerprint.clone(), - content, - tool_calls, - current_time, - logprobs, - finish_reason, - )); - - event.json_data(chat_complete).unwrap_or_else(|e| { - println!("Failed to serialize ChatCompletionChunk: {:?}", e); - Event::default() - }) -} - /// Generate tokens #[utoipa::path( post, @@ -1235,164 +1178,18 @@ pub(crate) async fn chat_completions( let (headers, response_stream) = generate_stream_internal(infer, compute_type, Json(generate_request), span).await; - // regex to match any function name - let function_regex = match Regex::new(r#"\{"function":\{"_name":"([^"]+)""#) { - Ok(regex) => regex, - Err(e) => { - return Err(( - StatusCode::INTERNAL_SERVER_ERROR, - Json(ErrorResponse { - error: format!("Failed to compile regex: {}", e), - error_type: "regex".to_string(), - }), - )) - } - }; - let response_stream = async_stream::stream! { let mut response_stream = Box::pin(response_stream); - let mut buffer = Vec::new(); - let mut json_buffer = String::new(); - let mut state = if using_tools { - StreamState::Buffering - } else { - StreamState::Content { - skip_close_quote: false, - } - }; - let mut response_as_tool = using_tools; + let mut state = ChatState::new(using_tools, stream_options, system_fingerprint, model_id, logprobs); while let Some(result) = response_stream.next().await { match result{ Ok(stream_token) => { - let token_text = &stream_token.token.text.clone(); - let usage = stream_token.details.as_ref().map(|details| { - let completion_tokens = details.generated_tokens; - let prompt_tokens = details.input_length; - let total_tokens = prompt_tokens + completion_tokens; - - Usage { - completion_tokens, - prompt_tokens, - total_tokens, - } - }); - match state { - StreamState::Buffering => { - json_buffer.push_str(&token_text.replace(" ", "")); - buffer.push(stream_token); - if let Some(captures) = function_regex.captures(&json_buffer) { - let function_name = captures[1].to_string(); - if function_name == "no_tool" { - state = StreamState::BufferTrailing; - response_as_tool = false; - buffer.clear(); - json_buffer.clear(); - } else { - state = StreamState::Content { - skip_close_quote: false, - }; - // send all the buffered messages - for stream_token in &buffer { - let event = create_event_from_stream_token( - stream_token, - logprobs, - response_as_tool, - system_fingerprint.clone(), - model_id.clone(), - ); - yield Ok::(event); - } - } - } - } - // if we skipped sending the buffer we need to avoid sending the following json key and quotes - StreamState::BufferTrailing => { - let infix_text = "\"content\":\""; - json_buffer.push_str(&token_text.replace(" ", "")); - // keep capturing until we find the infix text - match json_buffer.find(infix_text) { - Some(content_key_index) => { - json_buffer = - json_buffer[content_key_index + infix_text.len()..].to_string(); - } - None => { - continue; - } - } - // if there is leftover text after removing the infix text, we need to send it - if !json_buffer.is_empty() { - let event = Event::default(); - let current_time = std::time::SystemTime::now() - .duration_since(std::time::UNIX_EPOCH) - .unwrap_or_else(|_| std::time::Duration::from_secs(0)) - .as_secs(); - let chat_complete = - CompletionType::ChatCompletionChunk(ChatCompletionChunk::new( - model_id.clone(), - system_fingerprint.clone(), - Some(json_buffer.clone()), - None, - current_time, - None, - None, - )); - yield Ok(event.json_data(chat_complete).unwrap_or_else(|e| { - InferError::StreamSerializationError(e.to_string()).into() - })); - } - // cleanup the buffers - buffer.clear(); - json_buffer.clear(); - state = StreamState::Content { - skip_close_quote: true, - }; - } - StreamState::Content { skip_close_quote } => { - if skip_close_quote && token_text.contains('"') { - break; - } - - // send the content - let event = create_event_from_stream_token( - &stream_token, - logprobs, - response_as_tool, - system_fingerprint.clone(), - model_id.clone(), - ); - - yield Ok::(event); - } - } - - let should_send_usage = usage.is_some() - && stream_options - .as_ref() - .is_some_and(|opts| opts.include_usage); - - if should_send_usage { - let usage_data = usage.unwrap(); - let current_time = std::time::SystemTime::now() - .duration_since(std::time::UNIX_EPOCH) - .unwrap_or_else(|_| std::time::Duration::from_secs(0)) - .as_secs(); - - let chat_complete = CompletionType::ChatCompletionChunk(ChatCompletionChunk { - id: String::new(), - created: current_time, - model: model_id.clone(), - system_fingerprint: system_fingerprint.clone(), - choices: vec![], - usage: Some(Usage { - prompt_tokens: usage_data.prompt_tokens, - completion_tokens: usage_data.completion_tokens, - total_tokens: usage_data.total_tokens, - }), - }); - - yield Ok(Event::default() - .json_data(chat_complete) - .unwrap_or_else(|e| InferError::StreamSerializationError(e.to_string()).into())); + let events = state.push(stream_token); + for chat_complete in events{ + yield Ok(Event::default().json_data(chat_complete).unwrap_or_else(|e| { + tracing::error!("Failed to serialize ChatCompletionChunk: {:?}", e); + Event::default() + })); } } Err(err) => yield Ok(err.into_openai_event()) @@ -1413,56 +1210,7 @@ pub(crate) async fn chat_completions( .as_secs(); let (tool_calls, output) = if using_tools { - let gen_text_value: Value = - serde_json::from_str(&generation.generated_text).map_err(|e| { - InferError::ToolError(format!( - "Failed to parse generated text: {} {:?}", - e, generation.generated_text - )) - })?; - let function = gen_text_value.get("function").ok_or(InferError::ToolError( - "No function found in generated text".to_string(), - ))?; - - let name = function - .get("_name") - .and_then(Value::as_str) - .ok_or(InferError::ToolError( - "No _name found in generated text".to_string(), - ))? - .to_string(); - - let mut arguments = function.clone(); - if let Value::Object(ref mut props) = arguments { - props.remove("_name"); - } - match name.as_str() { - "no_tool" => { - // parse the content message - let content_message = arguments - .get("content") - .and_then(Value::as_str) - .ok_or_else(|| { - InferError::ToolError( - "No `content` found in generated text".to_string(), - ) - })? - .to_string(); - (None, Some(content_message)) - } - _ => { - let tool_calls = vec![ToolCall { - id: "0".to_string(), - r#type: "function".to_string(), - function: FunctionDefinition { - description: None, - name, - arguments, - }, - }]; - (Some(tool_calls), None) - } - } + crate::chat::parse_output(&generation.generated_text)? } else { (None, Some(generation.generated_text)) };