Fixing the tool calling convention.

This commit is contained in:
Nicolas Patry 2025-03-07 19:42:36 +01:00
parent b447f7e821
commit 207a70e7be
No known key found for this signature in database
GPG Key ID: 4242CEF24CB6DBF9
12 changed files with 1045 additions and 2442 deletions

View File

@ -10,7 +10,7 @@
"tool_calls": [
{
"function": {
"arguments": "{\"format\":\"fahrenheit\",\"location\":\"Brooklyn, NY\"}",
"arguments": "{\"location\":\"Brooklyn, NY\",\"format\":\"fahrenheit\"}",
"description": null,
"name": "get_current_weather"
},
@ -21,7 +21,7 @@
}
}
],
"created": 1741263682,
"created": 1741372434,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion",

View File

@ -10,7 +10,7 @@
"tool_calls": [
{
"function": {
"arguments": "{\"format\":\"fahrenheit\",\"location\":\"Brooklyn, NY\"}",
"arguments": "{\"location\":\"Brooklyn, NY\",\"format\":\"fahrenheit\"}",
"description": null,
"name": "get_current_weather"
},
@ -21,7 +21,7 @@
}
}
],
"created": 1741263684,
"created": 1741372657,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion",

View File

@ -8,8 +8,8 @@
"tool_calls": [
{
"function": {
"arguments": "{\"",
"name": null
"arguments": "{",
"name": "get_current_weather"
},
"id": "",
"index": 0,
@ -22,187 +22,7 @@
"logprobs": null
}
],
"created": 1741263685,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": "function",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741263685,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": "\":",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741263685,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": " {\"",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741263685,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": "_",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741263685,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": "name",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741263685,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": "\":",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741263685,
"created": 1741371719,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -232,157 +52,7 @@
"logprobs": null
}
],
"created": 1741263685,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": "get",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741263685,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": "_current",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741263685,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": "_weather",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741263685,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": "\",",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741263685,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": " \"",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741263685,
"created": 1741371719,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -412,7 +82,7 @@
"logprobs": null
}
],
"created": 1741263685,
"created": 1741371719,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -442,7 +112,7 @@
"logprobs": null
}
],
"created": 1741263685,
"created": 1741371719,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -472,7 +142,7 @@
"logprobs": null
}
],
"created": 1741263685,
"created": 1741371719,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -488,7 +158,7 @@
"tool_calls": [
{
"function": {
"arguments": "Paris",
"arguments": "Bro",
"name": null
},
"id": "",
@ -502,7 +172,37 @@
"logprobs": null
}
],
"created": 1741263685,
"created": 1741371719,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": "oklyn",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741371719,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -532,7 +232,7 @@
"logprobs": null
}
],
"created": 1741263685,
"created": 1741371719,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -548,7 +248,7 @@
"tool_calls": [
{
"function": {
"arguments": " France",
"arguments": " NY",
"name": null
},
"id": "",
@ -562,7 +262,7 @@
"logprobs": null
}
],
"created": 1741263685,
"created": 1741371719,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -592,7 +292,7 @@
"logprobs": null
}
],
"created": 1741263685,
"created": 1741371719,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -622,7 +322,7 @@
"logprobs": null
}
],
"created": 1741263685,
"created": 1741371719,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -652,7 +352,7 @@
"logprobs": null
}
],
"created": 1741263685,
"created": 1741371719,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -682,7 +382,7 @@
"logprobs": null
}
],
"created": 1741263685,
"created": 1741371719,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -712,7 +412,7 @@
"logprobs": null
}
],
"created": 1741263685,
"created": 1741371720,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -728,7 +428,7 @@
"tool_calls": [
{
"function": {
"arguments": "c",
"arguments": "f",
"name": null
},
"id": "",
@ -742,7 +442,7 @@
"logprobs": null
}
],
"created": 1741263685,
"created": 1741371720,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -758,7 +458,7 @@
"tool_calls": [
{
"function": {
"arguments": "elsius",
"arguments": "ahrenheit",
"name": null
},
"id": "",
@ -772,7 +472,7 @@
"logprobs": null
}
],
"created": 1741263685,
"created": 1741371720,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -788,7 +488,7 @@
"tool_calls": [
{
"function": {
"arguments": "\"}}",
"arguments": "\"}",
"name": null
},
"id": "",
@ -802,37 +502,7 @@
"logprobs": null
}
],
"created": 1741263685,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": "<|eot_id|>",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": "stop",
"index": 0,
"logprobs": null
}
],
"created": 1741263685,
"created": 1741371720,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",

View File

@ -1,4 +1,24 @@
[
{
"choices": [
{
"delta": {
"content": "",
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741364571,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
@ -12,7 +32,7 @@
"logprobs": null
}
],
"created": 1741263687,
"created": 1741364571,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -32,7 +52,7 @@
"logprobs": null
}
],
"created": 1741263687,
"created": 1741364571,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -52,7 +72,7 @@
"logprobs": null
}
],
"created": 1741263687,
"created": 1741364571,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -72,7 +92,7 @@
"logprobs": null
}
],
"created": 1741263687,
"created": 1741364571,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -92,7 +112,27 @@
"logprobs": null
}
],
"created": 1741263687,
"created": 1741364571,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": "!",
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741364571,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",

View File

@ -10,7 +10,7 @@
"tool_calls": [
{
"function": {
"arguments": "{\"format\":\"fahrenheit\",\"location\":\"Brooklyn, NY\"}",
"arguments": "{\"location\":\"Brooklyn, NY\",\"format\":\"fahrenheit\"}",
"description": null,
"name": "get_current_weather"
},
@ -21,7 +21,7 @@
}
}
],
"created": 1741263680,
"created": 1741372335,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion",

View File

@ -10,8 +10,8 @@
"tool_calls": [
{
"function": {
"arguments": "{\"",
"name": null
"arguments": "{",
"name": "get_current_weather"
},
"id": "",
"index": 0,
@ -24,205 +24,7 @@
"logprobs": null
}
],
"created": 1741263681,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": "function",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741263681,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": "\":",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741263681,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": " {\"",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741263681,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": "_",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741263681,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": "name",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741263681,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": "\":",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741263681,
"created": 1741370464,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -255,172 +57,7 @@
"logprobs": null
}
],
"created": 1741263681,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": "get",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741263681,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": "_current",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741263681,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": "_weather",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741263681,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": "\",",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741263681,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": " \"",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741263681,
"created": 1741370464,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -453,7 +90,7 @@
"logprobs": null
}
],
"created": 1741263681,
"created": 1741370464,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -486,7 +123,7 @@
"logprobs": null
}
],
"created": 1741263681,
"created": 1741370464,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -519,7 +156,7 @@
"logprobs": null
}
],
"created": 1741263681,
"created": 1741370464,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -552,7 +189,7 @@
"logprobs": null
}
],
"created": 1741263681,
"created": 1741370464,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -585,7 +222,7 @@
"logprobs": null
}
],
"created": 1741263681,
"created": 1741370464,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -618,7 +255,7 @@
"logprobs": null
}
],
"created": 1741263681,
"created": 1741370464,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -651,7 +288,7 @@
"logprobs": null
}
],
"created": 1741263681,
"created": 1741370464,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -684,7 +321,7 @@
"logprobs": null
}
],
"created": 1741263681,
"created": 1741370464,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -717,7 +354,7 @@
"logprobs": null
}
],
"created": 1741263681,
"created": 1741370464,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -750,7 +387,7 @@
"logprobs": null
}
],
"created": 1741263681,
"created": 1741370465,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -783,7 +420,7 @@
"logprobs": null
}
],
"created": 1741263681,
"created": 1741370465,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -816,7 +453,7 @@
"logprobs": null
}
],
"created": 1741263681,
"created": 1741370465,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -849,7 +486,7 @@
"logprobs": null
}
],
"created": 1741263681,
"created": 1741370465,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -882,7 +519,7 @@
"logprobs": null
}
],
"created": 1741263681,
"created": 1741370465,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -901,7 +538,7 @@
"tool_calls": [
{
"function": {
"arguments": "\"}}",
"arguments": "\"}",
"name": null
},
"id": "",
@ -915,40 +552,7 @@
"logprobs": null
}
],
"created": 1741263681,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": "<|eot_id|>",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": "stop",
"index": 0,
"logprobs": null
}
],
"created": 1741263681,
"created": 1741370465,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",

View File

@ -1,4 +1,24 @@
[
{
"choices": [
{
"delta": {
"content": "",
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741371722,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
@ -12,7 +32,7 @@
"logprobs": null
}
],
"created": 1741263688,
"created": 1741371722,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -32,7 +52,7 @@
"logprobs": null
}
],
"created": 1741263688,
"created": 1741371722,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -52,7 +72,7 @@
"logprobs": null
}
],
"created": 1741263688,
"created": 1741371722,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -72,7 +92,7 @@
"logprobs": null
}
],
"created": 1741263688,
"created": 1741371722,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -92,7 +112,7 @@
"logprobs": null
}
],
"created": 1741263688,
"created": 1741371722,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -112,7 +132,7 @@
"logprobs": null
}
],
"created": 1741263688,
"created": 1741371722,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -132,7 +152,7 @@
"logprobs": null
}
],
"created": 1741263688,
"created": 1741371722,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -152,7 +172,7 @@
"logprobs": null
}
],
"created": 1741263688,
"created": 1741371722,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -172,7 +192,7 @@
"logprobs": null
}
],
"created": 1741263688,
"created": 1741371722,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -192,7 +212,7 @@
"logprobs": null
}
],
"created": 1741263688,
"created": 1741371722,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -212,7 +232,7 @@
"logprobs": null
}
],
"created": 1741263688,
"created": 1741371722,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -232,7 +252,7 @@
"logprobs": null
}
],
"created": 1741263688,
"created": 1741371722,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -252,7 +272,7 @@
"logprobs": null
}
],
"created": 1741263688,
"created": 1741371722,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -272,7 +292,7 @@
"logprobs": null
}
],
"created": 1741263688,
"created": 1741371722,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -292,7 +312,7 @@
"logprobs": null
}
],
"created": 1741263688,
"created": 1741371722,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -312,7 +332,7 @@
"logprobs": null
}
],
"created": 1741263688,
"created": 1741371722,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -332,7 +352,7 @@
"logprobs": null
}
],
"created": 1741263688,
"created": 1741371722,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -352,7 +372,7 @@
"logprobs": null
}
],
"created": 1741263688,
"created": 1741371723,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -372,7 +392,7 @@
"logprobs": null
}
],
"created": 1741263688,
"created": 1741371723,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -392,7 +412,7 @@
"logprobs": null
}
],
"created": 1741263688,
"created": 1741371723,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -412,7 +432,7 @@
"logprobs": null
}
],
"created": 1741263688,
"created": 1741371723,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -432,7 +452,7 @@
"logprobs": null
}
],
"created": 1741263688,
"created": 1741371723,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -452,7 +472,7 @@
"logprobs": null
}
],
"created": 1741263688,
"created": 1741371723,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -472,7 +492,7 @@
"logprobs": null
}
],
"created": 1741263688,
"created": 1741371723,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -492,7 +512,7 @@
"logprobs": null
}
],
"created": 1741263688,
"created": 1741371723,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -512,7 +532,7 @@
"logprobs": null
}
],
"created": 1741263689,
"created": 1741371723,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -532,7 +552,7 @@
"logprobs": null
}
],
"created": 1741263689,
"created": 1741371723,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -552,7 +572,7 @@
"logprobs": null
}
],
"created": 1741263689,
"created": 1741371723,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -572,7 +592,7 @@
"logprobs": null
}
],
"created": 1741263689,
"created": 1741371723,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -592,7 +612,7 @@
"logprobs": null
}
],
"created": 1741263689,
"created": 1741371723,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -612,7 +632,7 @@
"logprobs": null
}
],
"created": 1741263689,
"created": 1741371723,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -632,7 +652,7 @@
"logprobs": null
}
],
"created": 1741263689,
"created": 1741371723,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -652,7 +672,7 @@
"logprobs": null
}
],
"created": 1741263689,
"created": 1741371723,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -672,7 +692,7 @@
"logprobs": null
}
],
"created": 1741263689,
"created": 1741371723,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -692,7 +712,7 @@
"logprobs": null
}
],
"created": 1741263689,
"created": 1741371723,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -712,7 +732,7 @@
"logprobs": null
}
],
"created": 1741263689,
"created": 1741371723,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -732,7 +752,7 @@
"logprobs": null
}
],
"created": 1741263689,
"created": 1741371723,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -752,7 +772,7 @@
"logprobs": null
}
],
"created": 1741263689,
"created": 1741371723,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -772,7 +792,7 @@
"logprobs": null
}
],
"created": 1741263689,
"created": 1741371723,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -792,7 +812,7 @@
"logprobs": null
}
],
"created": 1741263689,
"created": 1741371723,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -812,7 +832,7 @@
"logprobs": null
}
],
"created": 1741263689,
"created": 1741371723,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -832,7 +852,7 @@
"logprobs": null
}
],
"created": 1741263689,
"created": 1741371723,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -852,7 +872,7 @@
"logprobs": null
}
],
"created": 1741263689,
"created": 1741371723,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -872,7 +892,7 @@
"logprobs": null
}
],
"created": 1741263689,
"created": 1741371723,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -892,7 +912,7 @@
"logprobs": null
}
],
"created": 1741263689,
"created": 1741371723,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -912,7 +932,7 @@
"logprobs": null
}
],
"created": 1741263689,
"created": 1741371723,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -932,7 +952,7 @@
"logprobs": null
}
],
"created": 1741263689,
"created": 1741371723,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -952,7 +972,7 @@
"logprobs": null
}
],
"created": 1741263689,
"created": 1741371724,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -972,7 +992,7 @@
"logprobs": null
}
],
"created": 1741263689,
"created": 1741371724,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -992,7 +1012,7 @@
"logprobs": null
}
],
"created": 1741263689,
"created": 1741371724,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -1012,7 +1032,7 @@
"logprobs": null
}
],
"created": 1741263689,
"created": 1741371724,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -1032,7 +1052,7 @@
"logprobs": null
}
],
"created": 1741263689,
"created": 1741371724,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -1052,7 +1072,7 @@
"logprobs": null
}
],
"created": 1741263689,
"created": 1741371724,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -1072,7 +1092,7 @@
"logprobs": null
}
],
"created": 1741263689,
"created": 1741371724,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -1092,7 +1112,7 @@
"logprobs": null
}
],
"created": 1741263690,
"created": 1741371724,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -1112,7 +1132,7 @@
"logprobs": null
}
],
"created": 1741263690,
"created": 1741371724,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -1132,7 +1152,7 @@
"logprobs": null
}
],
"created": 1741263690,
"created": 1741371724,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -1152,7 +1172,7 @@
"logprobs": null
}
],
"created": 1741263690,
"created": 1741371724,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -1172,7 +1192,7 @@
"logprobs": null
}
],
"created": 1741263690,
"created": 1741371724,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -1192,7 +1212,7 @@
"logprobs": null
}
],
"created": 1741263690,
"created": 1741371724,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -1212,7 +1232,7 @@
"logprobs": null
}
],
"created": 1741263690,
"created": 1741371724,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -1232,7 +1252,7 @@
"logprobs": null
}
],
"created": 1741263690,
"created": 1741371724,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -1252,7 +1272,7 @@
"logprobs": null
}
],
"created": 1741263690,
"created": 1741371724,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -1272,7 +1292,7 @@
"logprobs": null
}
],
"created": 1741263690,
"created": 1741371724,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -1292,7 +1312,7 @@
"logprobs": null
}
],
"created": 1741263690,
"created": 1741371724,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -1312,7 +1332,7 @@
"logprobs": null
}
],
"created": 1741263690,
"created": 1741371724,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -1332,7 +1352,7 @@
"logprobs": null
}
],
"created": 1741263690,
"created": 1741371724,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -1352,7 +1372,7 @@
"logprobs": null
}
],
"created": 1741263690,
"created": 1741371724,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -1372,7 +1392,7 @@
"logprobs": null
}
],
"created": 1741263690,
"created": 1741371724,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -1392,7 +1412,7 @@
"logprobs": null
}
],
"created": 1741263690,
"created": 1741371724,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -1412,7 +1432,7 @@
"logprobs": null
}
],
"created": 1741263690,
"created": 1741371724,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -1432,7 +1452,7 @@
"logprobs": null
}
],
"created": 1741263690,
"created": 1741371724,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -1452,7 +1472,7 @@
"logprobs": null
}
],
"created": 1741263690,
"created": 1741371724,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -1472,7 +1492,7 @@
"logprobs": null
}
],
"created": 1741263690,
"created": 1741371724,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -1492,7 +1512,7 @@
"logprobs": null
}
],
"created": 1741263690,
"created": 1741371724,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -1512,7 +1532,7 @@
"logprobs": null
}
],
"created": 1741263690,
"created": 1741371724,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
@ -1532,7 +1552,27 @@
"logprobs": null
}
],
"created": 1741263690,
"created": 1741371724,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": ".",
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741371725,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",

View File

@ -108,7 +108,7 @@ async def test_flash_llama_grammar_tools_nostream(
function=ChatCompletionOutputFunctionDefinition(
description=None,
name="get_current_weather",
arguments='{"format":"fahrenheit","location":"Brooklyn, NY"}',
arguments='{"location":"Brooklyn, NY","format":"fahrenheit"}',
),
)
]
@ -142,14 +142,15 @@ async def test_flash_llama_grammar_tools_openai(
chunks = []
tool = ""
name = ""
for chunk in stream:
if chunk.choices[0].delta.tool_calls[0].function.name:
name += chunk.choices[0].delta.tool_calls[0].function.name
tool += chunk.choices[0].delta.tool_calls[0].function.arguments
chunks.append(chunk)
assert (
tool
== '{"function": {"_name": "get_current_weather", "location": "Brooklyn, NY", "format": "fahrenheit"}}<|eot_id|>'
)
assert name == "get_current_weather"
assert tool == '{ "location": "Brooklyn, NY", "format": "fahrenheit"}'
assert chunks == response_snapshot
@ -184,7 +185,7 @@ async def test_flash_llama_grammar_tools_auto_nostream(
function=ChatCompletionOutputFunctionDefinition(
description=None,
name="get_current_weather",
arguments='{"format":"fahrenheit","location":"Brooklyn, NY"}',
arguments='{"location":"Brooklyn, NY","format":"fahrenheit"}',
),
)
]
@ -223,7 +224,7 @@ async def test_flash_llama_grammar_tools_choice_nostream(
function=ChatCompletionOutputFunctionDefinition(
description=None,
name="get_current_weather",
arguments='{"format":"fahrenheit","location":"Brooklyn, NY"}',
arguments='{"location":"Brooklyn, NY","format":"fahrenheit"}',
),
)
]
@ -250,23 +251,24 @@ async def test_flash_llama_grammar_tools_choice_stream(
},
{
"role": "user",
"content": "What is the weather like in Paris, France?",
"content": "What is the weather like in Brooklyn, New York?",
},
],
stream=True,
)
tool_calls_generated = ""
arguments = ""
chunks = []
name = ""
for chunk in stream:
tool_calls_generated += chunk.choices[0].delta.tool_calls[0].function.arguments
if chunk.choices[0].delta.tool_calls[0].function.name:
name += chunk.choices[0].delta.tool_calls[0].function.name
arguments += chunk.choices[0].delta.tool_calls[0].function.arguments
assert chunk.choices[0].delta.content is None
chunks.append(chunk)
assert (
tool_calls_generated
== '{"function": {"_name": "get_current_weather", "location": "Paris, France", "format": "celsius"}}<|eot_id|>'
)
assert name == "get_current_weather"
assert arguments == '{ "location": "Brooklyn, NY", "format": "fahrenheit"}'
assert chunks == response_snapshot
@ -297,8 +299,6 @@ async def test_flash_llama_grammar_tools_insufficient_information_nostream(
content_generated = response.choices[0].message.content
assert response.choices[0].message.tool_calls is None
######## FIXME before MERGE ############################
# TODO This is different from the streaming case, this is NOT normal.
assert content_generated == "I am a helpful assistant!"
assert response == response_snapshot
@ -334,7 +334,8 @@ async def test_flash_llama_grammar_tools_insufficient_information_stream(
chunks.append(chunk)
assert chunk.choices[0].delta.tool_calls is None
assert content_generated == "I am a helpful assistant"
######## This is exactly the same as the non streaming case
assert content_generated == "I am a helpful assistant!"
assert chunks == response_snapshot
@ -371,7 +372,7 @@ async def test_flash_llama_grammar_tools_sea_creatures_stream_auto(
assert (
content_generated
== "There was a wise old octopus named Oracle. He lived in a cozy little cave beneath the waves with his best friend, a curious seahorse named Finley. One day, Finley met a playful dolphin named Daisy, and the three became inseparable. They spent their days exploring the ocean, playing hide-and-seek, and learning about the wonders of the sea from Oracle"
== "There was a wise old octopus named Oracle. He lived in a cozy little cave beneath the waves with his best friend, a curious seahorse named Finley. One day, Finley met a playful dolphin named Daisy, and the three became inseparable. They spent their days exploring the ocean, playing hide-and-seek, and learning about the wonders of the sea from Oracle."
)
assert chunks == response_snapshot
@ -401,14 +402,18 @@ async def test_flash_llama_grammar_tools_sea_creatures_stream_required(
)
tool_calls_generated = ""
name = ""
chunks = []
for chunk in stream:
assert chunk.choices[0].delta.content is None
if chunk.choices[0].delta.tool_calls[0].function.name:
name += chunk.choices[0].delta.tool_calls[0].function.name
tool_calls_generated += chunk.choices[0].delta.tool_calls[0].function.arguments
assert name == "get_n_day_weather_forecast"
assert (
tool_calls_generated
== '{"function": {"_name": "get_n_day_weather_forecast", "location": "San Francisco, CA", "format": "fahrenheit", "num_days":3}}<|eot_id|>'
== '{ "location": "San Francisco, CA", "format": "fahrenheit", "num_days":3}'
)
assert chunks == response_snapshot
@ -479,12 +484,17 @@ async def test_flash_llama_grammar_tools_sea_creatures_stream_function_object(
)
chunks = []
tool_calls_generated = ""
name = ""
for chunk in stream:
assert chunk.choices[0].delta.content is None
if chunk.choices[0].delta.tool_calls[0].function.name:
name += chunk.choices[0].delta.tool_calls[0].function.name
tool_calls_generated += chunk.choices[0].delta.tool_calls[0].function.arguments
chunks.append(chunk)
assert name == "get_n_day_weather_forecast"
assert (
tool_calls_generated
== '{"function": {"_name": "get_n_day_weather_forecast", "location": "San Francisco, CA", "format": "celsius", "num_days": 3}}<|eot_id|>'
== '{ "location": "San Francisco, CA", "format": "celsius", "num_days": 3}'
)
assert chunks == response_snapshot

725
router/src/chat.rs Normal file
View File

@ -0,0 +1,725 @@
use crate::{
infer::InferError, ChatCompletionChoice, ChatCompletionChunk, ChatCompletionDelta,
ChatCompletionLogprobs, CompletionType, DeltaToolCall, Function, FunctionDefinition,
StreamOptions, StreamResponse, TextMessage, ToolCallDelta, Usage,
};
use serde::Deserialize;
use serde_json::Value;
#[derive(Debug, Deserialize)]
#[serde(rename_all = "snake_case")]
enum _NoTool {
NoTool,
}
#[derive(Debug, Deserialize)]
struct NoToolCall {
_name: _NoTool,
content: String,
}
#[derive(Debug, Deserialize)]
struct NoTool {
function: NoToolCall,
}
#[derive(Debug, Deserialize)]
struct ToolCall {
_name: String,
#[serde(flatten, default)]
/// Using Map to preserve order
arguments: serde_json::Map<String, Value>,
}
#[derive(Debug, Deserialize)]
struct Call {
function: ToolCall,
}
pub(crate) fn parse_output(
generated_text: &str,
) -> Result<(Option<Vec<crate::ToolCall>>, Option<String>), InferError> {
let call: Call = serde_json::from_str(generated_text).map_err(|e| {
InferError::ToolError(format!(
"Failed to parse generated text: {} {:?}",
e, generated_text
))
})?;
let name = call.function._name;
match &name[..] {
"no_tool" => {
// parse the content message
let content_message = call
.function
.arguments
.get("content")
.and_then(Value::as_str)
.ok_or_else(|| {
InferError::ToolError("No `content` found in generated text".to_string())
})?
.to_string();
Ok((None, Some(content_message)))
}
name => {
let tool_calls = vec![crate::ToolCall {
id: "0".to_string(),
r#type: "function".to_string(),
function: FunctionDefinition {
description: None,
name: name.to_string(),
arguments: serde_json::to_value(call.function.arguments).map_err(|err| {
InferError::ToolError(format!(
"Could not convert arguments to JSON map {err}"
))
})?,
},
}];
Ok((Some(tool_calls), None))
}
}
}
/// Convert a StreamResponse into an Event to be sent over SSE
fn create_event_from_stream_token(
stream_token: &StreamResponse,
logprobs: bool,
inner_using_tools: bool,
system_fingerprint: String,
model_id: String,
function_name: Option<String>,
) -> CompletionType {
let current_time = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_else(|_| std::time::Duration::from_secs(0))
.as_secs();
let logprobs = logprobs.then(|| {
ChatCompletionLogprobs::from((stream_token.token.clone(), stream_token.top_tokens.clone()))
});
// replace the content with the tool calls if grammar is present
let content = if !stream_token.token.special {
Some(stream_token.token.text.clone())
} else {
None
};
let (content, tool_calls) = if inner_using_tools {
// Cast into a vec
(None, content)
} else {
(content, None)
};
let finish_reason = stream_token
.details
.as_ref()
.map(|details| details.finish_reason.format(true));
let delta = match (content, tool_calls) {
(Some(delta), _) => ChatCompletionDelta::Chat(TextMessage {
role: "assistant".to_string(),
content: delta,
..Default::default()
}),
(None, Some(tool_calls)) => ChatCompletionDelta::Tool(ToolCallDelta {
role: "assistant".to_string(),
tool_calls: vec![DeltaToolCall {
index: 0,
id: String::new(),
r#type: "function".to_string(),
function: Function {
name: function_name,
arguments: tool_calls,
},
}],
}),
(None, None) => ChatCompletionDelta::Chat(TextMessage {
role: "assistant".to_string(),
content: "".to_string(),
..Default::default()
}),
};
let choices = vec![ChatCompletionChoice {
index: 0,
delta,
logprobs,
finish_reason,
}];
CompletionType::ChatCompletionChunk(ChatCompletionChunk::new(
model_id,
system_fingerprint,
current_time,
choices,
None,
))
}
enum StreamState {
/// Before the tools was parsed
Buffering,
/// We detected a tool call here
Tool,
/// During the `content` part of the tool call
NoTool,
/// Finishing frames of the ToolCall
NoToolFinish,
/// This is without tool calling
Content,
}
pub struct ChatState {
state: StreamState,
text: String,
options: StreamOptions,
model_id: String,
fingerprint: String,
logprobs: bool,
}
impl ChatState {
pub fn new(
using_tools: bool,
options: StreamOptions,
fingerprint: String,
model_id: String,
logprobs: bool,
) -> Self {
let state = if using_tools {
StreamState::Buffering
} else {
StreamState::Content
};
let text = String::new();
Self {
state,
text,
options,
fingerprint,
model_id,
logprobs,
}
}
pub fn push(&mut self, mut stream_token: StreamResponse) -> Vec<CompletionType> {
let mut events = vec![];
let token_text = &stream_token.token.text;
match self.state {
StreamState::Buffering => {
self.text.push_str(token_text);
// We have a special match for `no_tool` in order to capture directly the `content`
// key which should be re-emitted as raw text.
if let Ok(value) = serde_json::from_str::<NoTool>(&format!("{}\"}}}}", self.text)) {
self.state = StreamState::NoTool;
// Modifiy the content of the token to be whatever was captured by the JSON
stream_token.token.text = value.function.content;
let chat_complete = create_event_from_stream_token(
&stream_token,
self.logprobs,
false,
self.fingerprint.clone(),
self.model_id.clone(),
None,
);
events.push(chat_complete);
}
// XXX Caution, here we do not postfix the quote, so that the current output
// Is necessarily finished with quotes for us to be able to parse.
let partial = &self.text;
let partial = partial.trim_end();
let partial = partial.trim_end_matches(',');
if let Ok(call) = serde_json::from_str::<Call>(&format!("{}}}}}", partial)) {
if call.function._name != "no_tool" {
stream_token.token.text = "{".to_string();
let chat_complete = create_event_from_stream_token(
&stream_token,
self.logprobs,
true,
self.fingerprint.clone(),
self.model_id.clone(),
Some(call.function._name),
);
events.push(chat_complete);
self.state = StreamState::Tool;
}
}
}
StreamState::Tool => {
self.text.push_str(token_text);
if serde_json::from_str::<Call>(&self.text).is_ok() {
self.state = StreamState::Buffering;
let mut text = stream_token.token.text.trim_end();
// Effectively trimming only the last closing brace
if text.ends_with('}') {
text = &text[..text.len() - 1];
}
stream_token.token.text = text.to_string();
let chat_complete = create_event_from_stream_token(
&stream_token,
self.logprobs,
true,
self.fingerprint.clone(),
self.model_id.clone(),
None,
);
events.push(chat_complete);
} else {
let chat_complete = create_event_from_stream_token(
&stream_token,
self.logprobs,
true,
self.fingerprint.clone(),
self.model_id.clone(),
None,
);
events.push(chat_complete);
}
}
// if we skipped sending the buffer we need to avoid sending the following json key and quotes
// We have remainder tokens, ignore everying,
StreamState::NoToolFinish => {}
StreamState::NoTool => {
self.text.push_str(token_text);
if token_text.contains("\"") || token_text.contains("}") {
let total_text = &self.text;
let total_text = total_text.trim_end();
let total_text = total_text.trim_end_matches('}');
let total_text = total_text.trim_end();
let total_text = total_text.trim_end_matches('"');
if let Ok(value) =
serde_json::from_str::<NoTool>(&format!("{}\"}}}}", total_text))
{
if !value.function.content.is_empty() {
let text = token_text.trim_end();
let text = text.trim_end_matches('}');
let mut text = text.trim_end();
// Effectively trim_end_match('"', 1)
// because we do not want to eventually trim finishing escaped quotes
// {{"\"Something\""}}
if text.ends_with("\"") {
text = &text[..text.len() - 1];
}
stream_token.token.text = text.to_string();
self.state = StreamState::NoToolFinish;
}
}
}
let chat_complete = create_event_from_stream_token(
&stream_token,
self.logprobs,
false,
self.fingerprint.clone(),
self.model_id.clone(),
None,
);
events.push(chat_complete);
}
StreamState::Content => {
let chat_complete = create_event_from_stream_token(
&stream_token,
self.logprobs,
false,
self.fingerprint.clone(),
self.model_id.clone(),
None,
);
events.push(chat_complete);
}
}
if self.options.include_usage {
if let Some(details) = stream_token.details {
let completion_tokens = details.generated_tokens;
let prompt_tokens = details.input_length;
let total_tokens = prompt_tokens + completion_tokens;
let usage = Usage {
completion_tokens,
prompt_tokens,
total_tokens,
};
let current_time = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_else(|_| std::time::Duration::from_secs(0))
.as_secs();
let chat_complete = CompletionType::ChatCompletionChunk(ChatCompletionChunk {
id: String::new(),
created: current_time,
model: self.model_id.clone(),
system_fingerprint: self.fingerprint.clone(),
choices: vec![],
usage: Some(Usage {
prompt_tokens: usage.prompt_tokens,
completion_tokens: usage.completion_tokens,
total_tokens: usage.total_tokens,
}),
});
events.push(chat_complete);
}
}
events
}
}
#[cfg(test)]
mod tests {
use crate::{
ChatCompletionChoice, ChatCompletionDelta, FinishReason, StreamDetails, TextMessage, Token,
};
use super::*;
#[test]
fn test_chat_stream() {
let mut chat_state = ChatState::new(
false,
StreamOptions {
include_usage: false,
},
"fingerprint".to_string(),
"model_id".to_string(),
false,
);
let events = chat_state.push(StreamResponse {
generated_text: None,
token: Token {
id: 42,
text: "Hi".to_string(),
logprob: 0.0,
special: false,
},
top_tokens: vec![],
index: 0,
details: None,
});
assert_eq!(events.len(), 1);
match &events[0] {
CompletionType::ChatCompletionChunk(ChatCompletionChunk { choices, .. }) => {
assert_eq!(
choices,
&[ChatCompletionChoice {
index: 0,
delta: ChatCompletionDelta::Chat(TextMessage {
role: "assistant".to_string(),
content: "Hi".to_string(),
tool_call_id: None,
}),
logprobs: None,
finish_reason: None,
}]
);
}
_ => panic!("Unexpected chunk"),
}
}
#[test]
fn test_chat_stream_usage() {
let mut chat_state = ChatState::new(
false,
StreamOptions {
include_usage: true,
},
"fingerprint".to_string(),
"model_id".to_string(),
false,
);
let events = chat_state.push(StreamResponse {
generated_text: None,
token: Token {
id: 42,
text: "Hi".to_string(),
logprob: 0.0,
special: false,
},
top_tokens: vec![],
index: 0,
details: Some(StreamDetails {
input_length: 2,
generated_tokens: 10,
seed: None,
finish_reason: FinishReason::Length,
}),
});
assert_eq!(events.len(), 2);
match &events[0] {
CompletionType::ChatCompletionChunk(ChatCompletionChunk { choices, .. }) => {
assert_eq!(
choices,
&[ChatCompletionChoice {
index: 0,
delta: ChatCompletionDelta::Chat(TextMessage {
role: "assistant".to_string(),
content: "Hi".to_string(),
tool_call_id: None,
}),
logprobs: None,
// HAS A FINISH REASON
finish_reason: Some("length".to_string()),
}]
);
}
_ => panic!("Unexpected chunk"),
}
match &events[1] {
CompletionType::ChatCompletionChunk(ChatCompletionChunk { usage, .. }) => {
assert_eq!(
*usage,
Some(Usage {
prompt_tokens: 2,
completion_tokens: 10,
total_tokens: 12,
})
);
}
_ => panic!("Unexpected chunk"),
}
}
#[test]
fn test_chat_stream_tool_no_tool() {
let mut chat_state = ChatState::new(
true,
StreamOptions {
include_usage: true,
},
"fingerprint".to_string(),
"model_id".to_string(),
false,
);
let tokens = vec![
"{\"".to_string(),
"function".to_string(),
"\":".to_string(),
" {\"".to_string(),
"_".to_string(),
"name".to_string(),
"\":".to_string(),
" \"".to_string(),
"no".to_string(),
"_tool".to_string(),
"\",".to_string(),
" \"".to_string(),
"content".to_string(),
"\":".to_string(),
" \"".to_string(), // Token 14
"I".to_string(), // Event 1
" am".to_string(), // Event 2
" a".to_string(), // Event 3
" helpful".to_string(), // Event 4
" assistant".to_string(), // Event 5
"!\"".to_string(), // Event 6 (with trailing quore removed)
"}".to_string(),
"}".to_string(),
];
// Initial ignored output
for text in &tokens[..14] {
let events = chat_state.push(StreamResponse {
generated_text: None,
token: Token {
id: 42,
text: text.to_string(),
logprob: 0.0,
special: false,
},
top_tokens: vec![],
index: 0,
details: None,
});
assert_eq!(events.len(), 0);
}
// No tool output
let mut output = String::new();
for text in &tokens[14..14 + 7] {
let events = chat_state.push(StreamResponse {
generated_text: None,
token: Token {
id: 42,
text: text.to_string(),
logprob: 0.0,
special: false,
},
top_tokens: vec![],
index: 0,
details: None,
});
assert_eq!(events.len(), 1);
match &events[0] {
CompletionType::ChatCompletionChunk(ChatCompletionChunk { choices, .. }) => {
assert_eq!(choices.len(), 1);
if let ChatCompletionChoice {
delta: ChatCompletionDelta::Chat(TextMessage { content, .. }),
..
} = &choices[0]
{
output.push_str(content);
} else {
panic!("Expected plain message");
}
}
_ => panic!("Unexpected chunk"),
}
}
assert_eq!(output, "I am a helpful assistant!");
// No tool finish
for text in &tokens[14 + 7..] {
let events = chat_state.push(StreamResponse {
generated_text: None,
token: Token {
id: 42,
text: text.to_string(),
logprob: 0.0,
special: false,
},
top_tokens: vec![],
index: 0,
details: None,
});
assert_eq!(events.len(), 0);
}
}
#[test]
fn test_chat_stream_tool_get_weather() {
let mut chat_state = ChatState::new(
true,
StreamOptions {
include_usage: true,
},
"fingerprint".to_string(),
"model_id".to_string(),
false,
);
let tokens = vec![
"{\"".to_string(),
"function".to_string(),
"\":".to_string(),
" {\"".to_string(),
"_".to_string(),
"name".to_string(),
"\":".to_string(),
" \"".to_string(),
"get".to_string(),
"_current".to_string(),
"_weather".to_string(),
"\",".to_string(),
// Event 1 is the function name
// Event 2 is the start of the arguments "{"
" \"".to_string(), // Event 3
"location".to_string(), // Event 4
"\":".to_string(), // Event 5
" \"".to_string(), // Event 6
"San".to_string(), // Event 7
" Francisco".to_string(), // Event 8
",".to_string(), // Event 9
" CA".to_string(), // Event 10
"\",".to_string(), // Event 11
" \"".to_string(), // Event 12
"format".to_string(), // Event 13
"\":".to_string(), // Event 14
" \"".to_string(), // Event 15
"c".to_string(), // Event 16
"elsius".to_string(), // Event 17
"\"}}".to_string(), // Event 18 retained (trailing brace removed)
];
// Initial ignored output
for text in &tokens[..11] {
let events = chat_state.push(StreamResponse {
generated_text: None,
token: Token {
id: 42,
text: text.to_string(),
logprob: 0.0,
special: false,
},
top_tokens: vec![],
index: 0,
details: None,
});
assert_eq!(events.len(), 0, "{events:?}");
}
// No tool output
let mut output = String::new();
let mut output_name = String::new();
for text in &tokens[11..11 + 17] {
let events = chat_state.push(StreamResponse {
generated_text: None,
token: Token {
id: 42,
text: text.to_string(),
logprob: 0.0,
special: false,
},
top_tokens: vec![],
index: 0,
details: None,
});
assert_eq!(events.len(), 1);
match &events[0] {
CompletionType::ChatCompletionChunk(ChatCompletionChunk { choices, .. }) => {
assert_eq!(choices.len(), 1);
if let ChatCompletionChoice {
delta: ChatCompletionDelta::Tool(ToolCallDelta { tool_calls, .. }),
..
} = &choices[0]
{
assert_eq!(tool_calls.len(), 1);
let DeltaToolCall {
index,
id,
r#type,
function,
} = &tool_calls[0];
assert_eq!(*index, 0);
assert_eq!(id, "");
assert_eq!(r#type, "function");
if let Some(name) = &function.name {
assert_eq!(name, "get_current_weather");
output_name.push_str(&name);
}
output.push_str(&function.arguments);
} else {
panic!("Expected plain message");
}
}
_ => panic!("Unexpected chunk"),
}
}
assert_eq!(output_name, "get_current_weather");
assert_eq!(
output,
"{ \"location\": \"San Francisco, CA\", \"format\": \"celsius\"}"
);
// No tool finish
for text in &tokens[11 + 17..] {
let events = chat_state.push(StreamResponse {
generated_text: None,
token: Token {
id: 42,
text: text.to_string(),
logprob: 0.0,
special: false,
},
top_tokens: vec![],
index: 0,
details: None,
});
assert_eq!(events.len(), 0);
}
}
}

View File

@ -8,6 +8,7 @@ pub mod validation;
mod kserve;
pub mod logging;
mod chat;
mod sagemaker;
pub mod usage_stats;
mod vertex;
@ -541,6 +542,7 @@ pub(crate) struct Chunk {
}
#[derive(Clone, Deserialize, Serialize, ToSchema)]
#[cfg_attr(test, derive(Debug))]
pub(crate) struct ChatCompletion {
pub id: String,
#[schema(example = "1706270835")]
@ -553,6 +555,7 @@ pub(crate) struct ChatCompletion {
}
#[derive(Clone, Deserialize, Serialize, ToSchema)]
#[cfg_attr(test, derive(Debug))]
pub(crate) struct ChatCompletionComplete {
pub index: u32,
pub message: OutputMessage,
@ -561,6 +564,7 @@ pub(crate) struct ChatCompletionComplete {
}
#[derive(Clone, Deserialize, Serialize, ToSchema)]
#[cfg_attr(test, derive(Debug, PartialEq))]
pub(crate) struct ChatCompletionLogprobs {
content: Vec<ChatCompletionLogprob>,
}
@ -619,6 +623,7 @@ impl From<(Vec<Token>, Vec<Vec<Token>>)> for ChatCompletionLogprobs {
}
#[derive(Clone, Deserialize, Serialize, ToSchema)]
#[cfg_attr(test, derive(Debug, PartialEq))]
pub(crate) struct ChatCompletionLogprob {
token: String,
logprob: f32,
@ -626,12 +631,14 @@ pub(crate) struct ChatCompletionLogprob {
}
#[derive(Clone, Deserialize, Serialize, ToSchema)]
#[cfg_attr(test, derive(Debug, PartialEq))]
pub(crate) struct ChatCompletionTopLogprob {
token: String,
logprob: f32,
}
#[derive(Clone, Deserialize, Serialize, ToSchema, Default)]
#[cfg_attr(test, derive(Debug, PartialEq))]
pub(crate) struct Usage {
pub prompt_tokens: u32,
pub completion_tokens: u32,
@ -640,6 +647,7 @@ pub(crate) struct Usage {
#[derive(Clone, Serialize, ToSchema)]
#[serde(tag = "object")]
#[cfg_attr(test, derive(Debug))]
enum CompletionType {
#[serde(rename = "chat.completion.chunk")]
ChatCompletionChunk(ChatCompletionChunk),
@ -707,6 +715,7 @@ impl ChatCompletion {
}
}
#[derive(Clone, Serialize, ToSchema)]
#[cfg_attr(test, derive(Debug))]
pub(crate) struct ChatCompletionChunk {
pub id: String,
#[schema(example = "1706270978")]
@ -719,6 +728,7 @@ pub(crate) struct ChatCompletionChunk {
}
#[derive(Clone, Serialize, ToSchema)]
#[cfg_attr(test, derive(Debug, PartialEq))]
pub(crate) struct ChatCompletionChoice {
pub index: u32,
pub delta: ChatCompletionDelta,
@ -735,6 +745,7 @@ pub struct ToolCallDelta {
#[derive(Clone, Debug, Serialize, ToSchema)]
#[serde(untagged)]
#[cfg_attr(test, derive(PartialEq))]
enum ChatCompletionDelta {
Chat(TextMessage),
Tool(ToolCallDelta),
@ -759,48 +770,17 @@ impl ChatCompletionChunk {
pub(crate) fn new(
model: String,
system_fingerprint: String,
delta: Option<String>,
tool_calls: Option<Vec<String>>,
created: u64,
logprobs: Option<ChatCompletionLogprobs>,
finish_reason: Option<String>,
choices: Vec<ChatCompletionChoice>,
usage: Option<Usage>,
) -> Self {
let delta = match (delta, tool_calls) {
(Some(delta), _) => ChatCompletionDelta::Chat(TextMessage {
role: "assistant".to_string(),
content: delta,
..Default::default()
}),
(None, Some(tool_calls)) => ChatCompletionDelta::Tool(ToolCallDelta {
role: "assistant".to_string(),
tool_calls: vec![DeltaToolCall {
index: 0,
id: String::new(),
r#type: "function".to_string(),
function: Function {
name: None,
arguments: tool_calls[0].to_string(),
},
}],
}),
(None, None) => ChatCompletionDelta::Chat(TextMessage {
role: "assistant".to_string(),
content: "".to_string(),
..Default::default()
}),
};
Self {
id: String::new(),
created,
model,
system_fingerprint,
choices: vec![ChatCompletionChoice {
index: 0,
delta,
logprobs,
finish_reason,
}],
usage: None,
choices,
usage,
}
}
}
@ -915,7 +895,7 @@ pub(crate) struct ChatRequest {
/// Options for streaming response. Only set this when you set stream: true.
#[serde(default)]
#[schema(nullable = true, example = "null")]
pub stream_options: Option<StreamOptions>,
pub stream_options: StreamOptions,
}
impl ChatRequest {
@ -1017,11 +997,12 @@ impl ChatRequest {
}
}
#[derive(Clone, Deserialize, ToSchema, Serialize)]
#[derive(Clone, Deserialize, ToSchema, Serialize, Default)]
#[cfg_attr(test, derive(Debug, PartialEq))]
struct StreamOptions {
/// If set, an additional chunk will be streamed before the data: [DONE] message. The usage field on this chunk shows the token usage statistics for the entire request, and the choices field will always be an empty array. All other chunks will also include a usage field, but with a null value.
#[schema(example = "true")]
#[serde(default)]
include_usage: bool,
}
@ -1445,7 +1426,7 @@ pub(crate) struct ChatTokenizeResponse {
#[serde(transparent)]
pub(crate) struct TokenizeResponse(Vec<SimpleToken>);
#[derive(Serialize, ToSchema)]
#[derive(Serialize, ToSchema, Clone)]
pub(crate) struct StreamDetails {
#[schema(example = "length")]
pub finish_reason: FinishReason,
@ -1457,7 +1438,7 @@ pub(crate) struct StreamDetails {
pub input_length: u32,
}
#[derive(Serialize, ToSchema)]
#[derive(Serialize, ToSchema, Clone)]
pub(crate) struct StreamResponse {
pub index: u32,
pub token: Token,
@ -1700,9 +1681,25 @@ mod tests {
assert!(matches!(
request.stream_options,
Some(StreamOptions {
StreamOptions {
include_usage: true
})
}
));
let json = json!({
"model": "",
"messages": [{
"role": "user",
"content": "Hello"
}]
});
let request: ChatRequest = serde_json::from_str(json.to_string().as_str()).unwrap();
assert!(matches!(
request.stream_options,
StreamOptions {
include_usage: false
}
));
}

View File

@ -1,3 +1,4 @@
use crate::chat::ChatState;
/// HTTP Server logic
use crate::config::Config;
use crate::infer::{Backend, Infer, InferError, InferResponse, InferStreamResponse};
@ -47,8 +48,6 @@ use http::header::AUTHORIZATION;
use metrics_exporter_prometheus::{Matcher, PrometheusBuilder, PrometheusHandle};
use pyo3::prelude::*;
use pyo3::types::IntoPyDict;
use regex::Regex;
use serde_json::Value;
use std::convert::Infallible;
use std::fs::File;
use std::io::BufReader;
@ -1114,62 +1113,6 @@ pub(crate) async fn completions(
}
}
enum StreamState {
Buffering,
BufferTrailing,
Content { skip_close_quote: bool },
}
/// Convert a StreamResponse into an Event to be sent over SSE
fn create_event_from_stream_token(
stream_token: &StreamResponse,
logprobs: bool,
inner_using_tools: bool,
system_fingerprint: String,
model_id: String,
) -> Event {
let event = Event::default();
let current_time = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_else(|_| std::time::Duration::from_secs(0))
.as_secs();
let logprobs = logprobs.then(|| {
ChatCompletionLogprobs::from((stream_token.token.clone(), stream_token.top_tokens.clone()))
});
// replace the content with the tool calls if grammar is present
let (content, tool_calls) = if inner_using_tools {
(None, Some(vec![stream_token.token.text.clone()]))
} else {
let content = if !stream_token.token.special {
Some(stream_token.token.text.clone())
} else {
None
};
(content, None)
};
let finish_reason = stream_token
.details
.as_ref()
.map(|details| details.finish_reason.format(true));
let chat_complete = CompletionType::ChatCompletionChunk(ChatCompletionChunk::new(
model_id.clone(),
system_fingerprint.clone(),
content,
tool_calls,
current_time,
logprobs,
finish_reason,
));
event.json_data(chat_complete).unwrap_or_else(|e| {
println!("Failed to serialize ChatCompletionChunk: {:?}", e);
Event::default()
})
}
/// Generate tokens
#[utoipa::path(
post,
@ -1235,165 +1178,19 @@ pub(crate) async fn chat_completions(
let (headers, response_stream) =
generate_stream_internal(infer, compute_type, Json(generate_request), span).await;
// regex to match any function name
let function_regex = match Regex::new(r#"\{"function":\{"_name":"([^"]+)""#) {
Ok(regex) => regex,
Err(e) => {
return Err((
StatusCode::INTERNAL_SERVER_ERROR,
Json(ErrorResponse {
error: format!("Failed to compile regex: {}", e),
error_type: "regex".to_string(),
}),
))
}
};
let response_stream = async_stream::stream! {
let mut response_stream = Box::pin(response_stream);
let mut buffer = Vec::new();
let mut json_buffer = String::new();
let mut state = if using_tools {
StreamState::Buffering
} else {
StreamState::Content {
skip_close_quote: false,
}
};
let mut response_as_tool = using_tools;
let mut state = ChatState::new(using_tools, stream_options, system_fingerprint, model_id, logprobs);
while let Some(result) = response_stream.next().await {
match result{
Ok(stream_token) => {
let token_text = &stream_token.token.text.clone();
let usage = stream_token.details.as_ref().map(|details| {
let completion_tokens = details.generated_tokens;
let prompt_tokens = details.input_length;
let total_tokens = prompt_tokens + completion_tokens;
Usage {
completion_tokens,
prompt_tokens,
total_tokens,
}
});
match state {
StreamState::Buffering => {
json_buffer.push_str(&token_text.replace(" ", ""));
buffer.push(stream_token);
if let Some(captures) = function_regex.captures(&json_buffer) {
let function_name = captures[1].to_string();
if function_name == "no_tool" {
state = StreamState::BufferTrailing;
response_as_tool = false;
buffer.clear();
json_buffer.clear();
} else {
state = StreamState::Content {
skip_close_quote: false,
};
// send all the buffered messages
for stream_token in &buffer {
let event = create_event_from_stream_token(
stream_token,
logprobs,
response_as_tool,
system_fingerprint.clone(),
model_id.clone(),
);
yield Ok::<Event, Infallible>(event);
}
}
}
}
// if we skipped sending the buffer we need to avoid sending the following json key and quotes
StreamState::BufferTrailing => {
let infix_text = "\"content\":\"";
json_buffer.push_str(&token_text.replace(" ", ""));
// keep capturing until we find the infix text
match json_buffer.find(infix_text) {
Some(content_key_index) => {
json_buffer =
json_buffer[content_key_index + infix_text.len()..].to_string();
}
None => {
continue;
}
}
// if there is leftover text after removing the infix text, we need to send it
if !json_buffer.is_empty() {
let event = Event::default();
let current_time = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_else(|_| std::time::Duration::from_secs(0))
.as_secs();
let chat_complete =
CompletionType::ChatCompletionChunk(ChatCompletionChunk::new(
model_id.clone(),
system_fingerprint.clone(),
Some(json_buffer.clone()),
None,
current_time,
None,
None,
));
yield Ok(event.json_data(chat_complete).unwrap_or_else(|e| {
InferError::StreamSerializationError(e.to_string()).into()
let events = state.push(stream_token);
for chat_complete in events{
yield Ok(Event::default().json_data(chat_complete).unwrap_or_else(|e| {
tracing::error!("Failed to serialize ChatCompletionChunk: {:?}", e);
Event::default()
}));
}
// cleanup the buffers
buffer.clear();
json_buffer.clear();
state = StreamState::Content {
skip_close_quote: true,
};
}
StreamState::Content { skip_close_quote } => {
if skip_close_quote && token_text.contains('"') {
break;
}
// send the content
let event = create_event_from_stream_token(
&stream_token,
logprobs,
response_as_tool,
system_fingerprint.clone(),
model_id.clone(),
);
yield Ok::<Event, Infallible>(event);
}
}
let should_send_usage = usage.is_some()
&& stream_options
.as_ref()
.is_some_and(|opts| opts.include_usage);
if should_send_usage {
let usage_data = usage.unwrap();
let current_time = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_else(|_| std::time::Duration::from_secs(0))
.as_secs();
let chat_complete = CompletionType::ChatCompletionChunk(ChatCompletionChunk {
id: String::new(),
created: current_time,
model: model_id.clone(),
system_fingerprint: system_fingerprint.clone(),
choices: vec![],
usage: Some(Usage {
prompt_tokens: usage_data.prompt_tokens,
completion_tokens: usage_data.completion_tokens,
total_tokens: usage_data.total_tokens,
}),
});
yield Ok(Event::default()
.json_data(chat_complete)
.unwrap_or_else(|e| InferError::StreamSerializationError(e.to_string()).into()));
}
}
Err(err) => yield Ok(err.into_openai_event())
}
@ -1413,56 +1210,7 @@ pub(crate) async fn chat_completions(
.as_secs();
let (tool_calls, output) = if using_tools {
let gen_text_value: Value =
serde_json::from_str(&generation.generated_text).map_err(|e| {
InferError::ToolError(format!(
"Failed to parse generated text: {} {:?}",
e, generation.generated_text
))
})?;
let function = gen_text_value.get("function").ok_or(InferError::ToolError(
"No function found in generated text".to_string(),
))?;
let name = function
.get("_name")
.and_then(Value::as_str)
.ok_or(InferError::ToolError(
"No _name found in generated text".to_string(),
))?
.to_string();
let mut arguments = function.clone();
if let Value::Object(ref mut props) = arguments {
props.remove("_name");
}
match name.as_str() {
"no_tool" => {
// parse the content message
let content_message = arguments
.get("content")
.and_then(Value::as_str)
.ok_or_else(|| {
InferError::ToolError(
"No `content` found in generated text".to_string(),
)
})?
.to_string();
(None, Some(content_message))
}
_ => {
let tool_calls = vec![ToolCall {
id: "0".to_string(),
r#type: "function".to_string(),
function: FunctionDefinition {
description: None,
name,
arguments,
},
}];
(Some(tool_calls), None)
}
}
crate::chat::parse_output(&generation.generated_text)?
} else {
(None, Some(generation.generated_text))
};