Fix tool call3 (#3086)

* Fixing the tool calling convention.

* Update tehe doc.

* Fixing some corner cases.

* Fixing the tool call id.

* Fmt.

* Snapshot update with the new updated tool_call_id.

* More qwen2.
This commit is contained in:
Nicolas Patry 2025-03-12 09:22:53 +01:00 committed by GitHub
parent ae4451c3da
commit f74c36fe0d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
17 changed files with 1364 additions and 2481 deletions

View File

@ -29,7 +29,7 @@ homepage = "https://github.com/huggingface/text-generation-inference"
[workspace.dependencies] [workspace.dependencies]
base64 = "0.22.0" base64 = "0.22.0"
tokenizers = { version = "0.20.0", features = ["http"] } tokenizers = { version = "0.20.0", features = ["http"] }
hf-hub = { version = "0.4.1", features = ["tokio"] } hf-hub = { version = "0.4.2", features = ["tokio"] }
metrics = { version = "0.23.0" } metrics = { version = "0.23.0" }
metrics-exporter-prometheus = { version = "0.15.1", features = [] } metrics-exporter-prometheus = { version = "0.15.1", features = [] }
minijinja = { version = "2.2.0", features = ["json"] } minijinja = { version = "2.2.0", features = ["json"] }

View File

@ -2148,9 +2148,6 @@
}, },
"StreamOptions": { "StreamOptions": {
"type": "object", "type": "object",
"required": [
"include_usage"
],
"properties": { "properties": {
"include_usage": { "include_usage": {
"type": "boolean", "type": "boolean",

View File

@ -10,7 +10,7 @@
"tool_calls": [ "tool_calls": [
{ {
"function": { "function": {
"arguments": "{\"format\":\"fahrenheit\",\"location\":\"Brooklyn, NY\"}", "arguments": "{\"location\":\"Brooklyn, NY\",\"format\":\"fahrenheit\"}",
"description": null, "description": null,
"name": "get_current_weather" "name": "get_current_weather"
}, },
@ -21,7 +21,7 @@
} }
} }
], ],
"created": 1741263682, "created": 1741372434,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion", "object": "chat.completion",

View File

@ -10,7 +10,7 @@
"tool_calls": [ "tool_calls": [
{ {
"function": { "function": {
"arguments": "{\"format\":\"fahrenheit\",\"location\":\"Brooklyn, NY\"}", "arguments": "{\"location\":\"Brooklyn, NY\",\"format\":\"fahrenheit\"}",
"description": null, "description": null,
"name": "get_current_weather" "name": "get_current_weather"
}, },
@ -21,7 +21,7 @@
} }
} }
], ],
"created": 1741263684, "created": 1741372657,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion", "object": "chat.completion",

View File

@ -8,10 +8,10 @@
"tool_calls": [ "tool_calls": [
{ {
"function": { "function": {
"arguments": "{\"", "arguments": "{",
"name": null "name": "get_current_weather"
}, },
"id": "", "id": "0",
"index": 0, "index": 0,
"type": "function" "type": "function"
} }
@ -22,187 +22,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263685, "created": 1741688515,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": "function",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741263685,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": "\":",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741263685,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": " {\"",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741263685,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": "_",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741263685,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": "name",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741263685,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": "\":",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741263685,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -221,7 +41,7 @@
"arguments": " \"", "arguments": " \"",
"name": null "name": null
}, },
"id": "", "id": "0",
"index": 0, "index": 0,
"type": "function" "type": "function"
} }
@ -232,157 +52,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263685, "created": 1741688515,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": "get",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741263685,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": "_current",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741263685,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": "_weather",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741263685,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": "\",",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741263685,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": " \"",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741263685,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -401,7 +71,7 @@
"arguments": "location", "arguments": "location",
"name": null "name": null
}, },
"id": "", "id": "0",
"index": 0, "index": 0,
"type": "function" "type": "function"
} }
@ -412,7 +82,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263685, "created": 1741688515,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -431,7 +101,7 @@
"arguments": "\":", "arguments": "\":",
"name": null "name": null
}, },
"id": "", "id": "0",
"index": 0, "index": 0,
"type": "function" "type": "function"
} }
@ -442,7 +112,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263685, "created": 1741688515,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -461,7 +131,7 @@
"arguments": " \"", "arguments": " \"",
"name": null "name": null
}, },
"id": "", "id": "0",
"index": 0, "index": 0,
"type": "function" "type": "function"
} }
@ -472,7 +142,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263685, "created": 1741688515,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -488,10 +158,10 @@
"tool_calls": [ "tool_calls": [
{ {
"function": { "function": {
"arguments": "Paris", "arguments": "Bro",
"name": null "name": null
}, },
"id": "", "id": "0",
"index": 0, "index": 0,
"type": "function" "type": "function"
} }
@ -502,7 +172,37 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263685, "created": 1741688515,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": "oklyn",
"name": null
},
"id": "0",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741688515,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -521,7 +221,7 @@
"arguments": ",", "arguments": ",",
"name": null "name": null
}, },
"id": "", "id": "0",
"index": 0, "index": 0,
"type": "function" "type": "function"
} }
@ -532,7 +232,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263685, "created": 1741688515,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -548,10 +248,10 @@
"tool_calls": [ "tool_calls": [
{ {
"function": { "function": {
"arguments": " France", "arguments": " NY",
"name": null "name": null
}, },
"id": "", "id": "0",
"index": 0, "index": 0,
"type": "function" "type": "function"
} }
@ -562,7 +262,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263685, "created": 1741688515,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -581,7 +281,7 @@
"arguments": "\",", "arguments": "\",",
"name": null "name": null
}, },
"id": "", "id": "0",
"index": 0, "index": 0,
"type": "function" "type": "function"
} }
@ -592,7 +292,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263685, "created": 1741688515,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -611,7 +311,7 @@
"arguments": " \"", "arguments": " \"",
"name": null "name": null
}, },
"id": "", "id": "0",
"index": 0, "index": 0,
"type": "function" "type": "function"
} }
@ -622,7 +322,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263685, "created": 1741688515,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -641,7 +341,7 @@
"arguments": "format", "arguments": "format",
"name": null "name": null
}, },
"id": "", "id": "0",
"index": 0, "index": 0,
"type": "function" "type": "function"
} }
@ -652,7 +352,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263685, "created": 1741688515,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -671,7 +371,7 @@
"arguments": "\":", "arguments": "\":",
"name": null "name": null
}, },
"id": "", "id": "0",
"index": 0, "index": 0,
"type": "function" "type": "function"
} }
@ -682,7 +382,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263685, "created": 1741688515,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -701,7 +401,7 @@
"arguments": " \"", "arguments": " \"",
"name": null "name": null
}, },
"id": "", "id": "0",
"index": 0, "index": 0,
"type": "function" "type": "function"
} }
@ -712,7 +412,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263685, "created": 1741688515,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -728,10 +428,10 @@
"tool_calls": [ "tool_calls": [
{ {
"function": { "function": {
"arguments": "c", "arguments": "f",
"name": null "name": null
}, },
"id": "", "id": "0",
"index": 0, "index": 0,
"type": "function" "type": "function"
} }
@ -742,7 +442,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263685, "created": 1741688515,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -758,10 +458,10 @@
"tool_calls": [ "tool_calls": [
{ {
"function": { "function": {
"arguments": "elsius", "arguments": "ahrenheit",
"name": null "name": null
}, },
"id": "", "id": "0",
"index": 0, "index": 0,
"type": "function" "type": "function"
} }
@ -772,7 +472,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263685, "created": 1741688515,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -788,10 +488,10 @@
"tool_calls": [ "tool_calls": [
{ {
"function": { "function": {
"arguments": "\"}}", "arguments": "\"}",
"name": null "name": null
}, },
"id": "", "id": "0",
"index": 0, "index": 0,
"type": "function" "type": "function"
} }
@ -802,37 +502,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263685, "created": 1741688515,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": "<|eot_id|>",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": "stop",
"index": 0,
"logprobs": null
}
],
"created": 1741263685,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",

View File

@ -1,4 +1,24 @@
[ [
{
"choices": [
{
"delta": {
"content": "",
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741364571,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{ {
"choices": [ "choices": [
{ {
@ -12,7 +32,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263687, "created": 1741364571,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -32,7 +52,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263687, "created": 1741364571,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -52,7 +72,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263687, "created": 1741364571,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -72,7 +92,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263687, "created": 1741364571,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -92,7 +112,27 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263687, "created": 1741364571,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": "!",
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741364571,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",

View File

@ -10,7 +10,7 @@
"tool_calls": [ "tool_calls": [
{ {
"function": { "function": {
"arguments": "{\"format\":\"fahrenheit\",\"location\":\"Brooklyn, NY\"}", "arguments": "{\"location\":\"Brooklyn, NY\",\"format\":\"fahrenheit\"}",
"description": null, "description": null,
"name": "get_current_weather" "name": "get_current_weather"
}, },
@ -21,7 +21,7 @@
} }
} }
], ],
"created": 1741263680, "created": 1741372335,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion", "object": "chat.completion",

View File

@ -10,10 +10,10 @@
"tool_calls": [ "tool_calls": [
{ {
"function": { "function": {
"arguments": "{\"", "arguments": "{",
"name": null "name": "get_current_weather"
}, },
"id": "", "id": "0",
"index": 0, "index": 0,
"type": "function" "type": "function"
} }
@ -24,205 +24,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263681, "created": 1741689423,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": "function",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741263681,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": "\":",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741263681,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": " {\"",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741263681,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": "_",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741263681,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": "name",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741263681,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": "\":",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741263681,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -244,7 +46,7 @@
"arguments": " \"", "arguments": " \"",
"name": null "name": null
}, },
"id": "", "id": "0",
"index": 0, "index": 0,
"type": "function" "type": "function"
} }
@ -255,172 +57,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263681, "created": 1741689423,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": "get",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741263681,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": "_current",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741263681,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": "_weather",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741263681,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": "\",",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741263681,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": " \"",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741263681,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -442,7 +79,7 @@
"arguments": "location", "arguments": "location",
"name": null "name": null
}, },
"id": "", "id": "0",
"index": 0, "index": 0,
"type": "function" "type": "function"
} }
@ -453,7 +90,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263681, "created": 1741689423,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -475,7 +112,7 @@
"arguments": "\":", "arguments": "\":",
"name": null "name": null
}, },
"id": "", "id": "0",
"index": 0, "index": 0,
"type": "function" "type": "function"
} }
@ -486,7 +123,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263681, "created": 1741689423,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -508,7 +145,7 @@
"arguments": " \"", "arguments": " \"",
"name": null "name": null
}, },
"id": "", "id": "0",
"index": 0, "index": 0,
"type": "function" "type": "function"
} }
@ -519,7 +156,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263681, "created": 1741689423,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -541,7 +178,7 @@
"arguments": "Bro", "arguments": "Bro",
"name": null "name": null
}, },
"id": "", "id": "0",
"index": 0, "index": 0,
"type": "function" "type": "function"
} }
@ -552,7 +189,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263681, "created": 1741689423,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -574,7 +211,7 @@
"arguments": "oklyn", "arguments": "oklyn",
"name": null "name": null
}, },
"id": "", "id": "0",
"index": 0, "index": 0,
"type": "function" "type": "function"
} }
@ -585,7 +222,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263681, "created": 1741689423,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -607,7 +244,7 @@
"arguments": ",", "arguments": ",",
"name": null "name": null
}, },
"id": "", "id": "0",
"index": 0, "index": 0,
"type": "function" "type": "function"
} }
@ -618,7 +255,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263681, "created": 1741689423,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -640,7 +277,7 @@
"arguments": " NY", "arguments": " NY",
"name": null "name": null
}, },
"id": "", "id": "0",
"index": 0, "index": 0,
"type": "function" "type": "function"
} }
@ -651,7 +288,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263681, "created": 1741689423,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -673,7 +310,7 @@
"arguments": "\",", "arguments": "\",",
"name": null "name": null
}, },
"id": "", "id": "0",
"index": 0, "index": 0,
"type": "function" "type": "function"
} }
@ -684,7 +321,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263681, "created": 1741689423,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -706,7 +343,7 @@
"arguments": " \"", "arguments": " \"",
"name": null "name": null
}, },
"id": "", "id": "0",
"index": 0, "index": 0,
"type": "function" "type": "function"
} }
@ -717,7 +354,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263681, "created": 1741689423,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -739,7 +376,7 @@
"arguments": "format", "arguments": "format",
"name": null "name": null
}, },
"id": "", "id": "0",
"index": 0, "index": 0,
"type": "function" "type": "function"
} }
@ -750,7 +387,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263681, "created": 1741689423,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -772,7 +409,7 @@
"arguments": "\":", "arguments": "\":",
"name": null "name": null
}, },
"id": "", "id": "0",
"index": 0, "index": 0,
"type": "function" "type": "function"
} }
@ -783,7 +420,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263681, "created": 1741689423,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -805,7 +442,7 @@
"arguments": " \"", "arguments": " \"",
"name": null "name": null
}, },
"id": "", "id": "0",
"index": 0, "index": 0,
"type": "function" "type": "function"
} }
@ -816,7 +453,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263681, "created": 1741689423,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -838,7 +475,7 @@
"arguments": "f", "arguments": "f",
"name": null "name": null
}, },
"id": "", "id": "0",
"index": 0, "index": 0,
"type": "function" "type": "function"
} }
@ -849,7 +486,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263681, "created": 1741689423,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -871,7 +508,7 @@
"arguments": "ahrenheit", "arguments": "ahrenheit",
"name": null "name": null
}, },
"id": "", "id": "0",
"index": 0, "index": 0,
"type": "function" "type": "function"
} }
@ -882,7 +519,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263681, "created": 1741689423,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -901,10 +538,10 @@
"tool_calls": [ "tool_calls": [
{ {
"function": { "function": {
"arguments": "\"}}", "arguments": "\"}",
"name": null "name": null
}, },
"id": "", "id": "0",
"index": 0, "index": 0,
"type": "function" "type": "function"
} }
@ -915,40 +552,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263681, "created": 1741689423,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": "<|eot_id|>",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": "stop",
"index": 0,
"logprobs": null
}
],
"created": 1741263681,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",

View File

@ -1,4 +1,24 @@
[ [
{
"choices": [
{
"delta": {
"content": "",
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741371722,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{ {
"choices": [ "choices": [
{ {
@ -12,7 +32,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263688, "created": 1741371722,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -32,7 +52,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263688, "created": 1741371722,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -52,7 +72,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263688, "created": 1741371722,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -72,7 +92,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263688, "created": 1741371722,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -92,7 +112,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263688, "created": 1741371722,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -112,7 +132,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263688, "created": 1741371722,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -132,7 +152,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263688, "created": 1741371722,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -152,7 +172,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263688, "created": 1741371722,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -172,7 +192,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263688, "created": 1741371722,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -192,7 +212,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263688, "created": 1741371722,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -212,7 +232,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263688, "created": 1741371722,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -232,7 +252,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263688, "created": 1741371722,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -252,7 +272,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263688, "created": 1741371722,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -272,7 +292,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263688, "created": 1741371722,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -292,7 +312,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263688, "created": 1741371722,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -312,7 +332,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263688, "created": 1741371722,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -332,7 +352,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263688, "created": 1741371722,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -352,7 +372,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263688, "created": 1741371723,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -372,7 +392,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263688, "created": 1741371723,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -392,7 +412,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263688, "created": 1741371723,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -412,7 +432,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263688, "created": 1741371723,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -432,7 +452,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263688, "created": 1741371723,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -452,7 +472,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263688, "created": 1741371723,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -472,7 +492,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263688, "created": 1741371723,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -492,7 +512,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263688, "created": 1741371723,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -512,7 +532,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263689, "created": 1741371723,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -532,7 +552,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263689, "created": 1741371723,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -552,7 +572,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263689, "created": 1741371723,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -572,7 +592,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263689, "created": 1741371723,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -592,7 +612,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263689, "created": 1741371723,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -612,7 +632,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263689, "created": 1741371723,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -632,7 +652,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263689, "created": 1741371723,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -652,7 +672,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263689, "created": 1741371723,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -672,7 +692,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263689, "created": 1741371723,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -692,7 +712,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263689, "created": 1741371723,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -712,7 +732,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263689, "created": 1741371723,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -732,7 +752,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263689, "created": 1741371723,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -752,7 +772,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263689, "created": 1741371723,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -772,7 +792,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263689, "created": 1741371723,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -792,7 +812,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263689, "created": 1741371723,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -812,7 +832,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263689, "created": 1741371723,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -832,7 +852,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263689, "created": 1741371723,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -852,7 +872,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263689, "created": 1741371723,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -872,7 +892,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263689, "created": 1741371723,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -892,7 +912,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263689, "created": 1741371723,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -912,7 +932,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263689, "created": 1741371723,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -932,7 +952,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263689, "created": 1741371723,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -952,7 +972,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263689, "created": 1741371724,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -972,7 +992,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263689, "created": 1741371724,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -992,7 +1012,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263689, "created": 1741371724,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -1012,7 +1032,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263689, "created": 1741371724,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -1032,7 +1052,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263689, "created": 1741371724,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -1052,7 +1072,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263689, "created": 1741371724,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -1072,7 +1092,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263689, "created": 1741371724,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -1092,7 +1112,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263690, "created": 1741371724,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -1112,7 +1132,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263690, "created": 1741371724,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -1132,7 +1152,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263690, "created": 1741371724,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -1152,7 +1172,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263690, "created": 1741371724,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -1172,7 +1192,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263690, "created": 1741371724,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -1192,7 +1212,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263690, "created": 1741371724,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -1212,7 +1232,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263690, "created": 1741371724,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -1232,7 +1252,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263690, "created": 1741371724,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -1252,7 +1272,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263690, "created": 1741371724,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -1272,7 +1292,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263690, "created": 1741371724,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -1292,7 +1312,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263690, "created": 1741371724,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -1312,7 +1332,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263690, "created": 1741371724,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -1332,7 +1352,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263690, "created": 1741371724,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -1352,7 +1372,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263690, "created": 1741371724,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -1372,7 +1392,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263690, "created": 1741371724,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -1392,7 +1412,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263690, "created": 1741371724,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -1412,7 +1432,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263690, "created": 1741371724,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -1432,7 +1452,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263690, "created": 1741371724,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -1452,7 +1472,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263690, "created": 1741371724,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -1472,7 +1492,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263690, "created": 1741371724,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -1492,7 +1512,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263690, "created": 1741371724,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -1512,7 +1532,7 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263690, "created": 1741371724,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -1532,7 +1552,27 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1741263690, "created": 1741371724,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": ".",
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741371725,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",

View File

@ -108,7 +108,7 @@ async def test_flash_llama_grammar_tools_nostream(
function=ChatCompletionOutputFunctionDefinition( function=ChatCompletionOutputFunctionDefinition(
description=None, description=None,
name="get_current_weather", name="get_current_weather",
arguments='{"format":"fahrenheit","location":"Brooklyn, NY"}', arguments='{"location":"Brooklyn, NY","format":"fahrenheit"}',
), ),
) )
] ]
@ -142,14 +142,15 @@ async def test_flash_llama_grammar_tools_openai(
chunks = [] chunks = []
tool = "" tool = ""
name = ""
for chunk in stream: for chunk in stream:
if chunk.choices[0].delta.tool_calls[0].function.name:
name += chunk.choices[0].delta.tool_calls[0].function.name
tool += chunk.choices[0].delta.tool_calls[0].function.arguments tool += chunk.choices[0].delta.tool_calls[0].function.arguments
chunks.append(chunk) chunks.append(chunk)
assert ( assert name == "get_current_weather"
tool assert tool == '{ "location": "Brooklyn, NY", "format": "fahrenheit"}'
== '{"function": {"_name": "get_current_weather", "location": "Brooklyn, NY", "format": "fahrenheit"}}<|eot_id|>'
)
assert chunks == response_snapshot assert chunks == response_snapshot
@ -184,7 +185,7 @@ async def test_flash_llama_grammar_tools_auto_nostream(
function=ChatCompletionOutputFunctionDefinition( function=ChatCompletionOutputFunctionDefinition(
description=None, description=None,
name="get_current_weather", name="get_current_weather",
arguments='{"format":"fahrenheit","location":"Brooklyn, NY"}', arguments='{"location":"Brooklyn, NY","format":"fahrenheit"}',
), ),
) )
] ]
@ -223,7 +224,7 @@ async def test_flash_llama_grammar_tools_choice_nostream(
function=ChatCompletionOutputFunctionDefinition( function=ChatCompletionOutputFunctionDefinition(
description=None, description=None,
name="get_current_weather", name="get_current_weather",
arguments='{"format":"fahrenheit","location":"Brooklyn, NY"}', arguments='{"location":"Brooklyn, NY","format":"fahrenheit"}',
), ),
) )
] ]
@ -250,23 +251,24 @@ async def test_flash_llama_grammar_tools_choice_stream(
}, },
{ {
"role": "user", "role": "user",
"content": "What is the weather like in Paris, France?", "content": "What is the weather like in Brooklyn, New York?",
}, },
], ],
stream=True, stream=True,
) )
tool_calls_generated = "" arguments = ""
chunks = [] chunks = []
name = ""
for chunk in stream: for chunk in stream:
tool_calls_generated += chunk.choices[0].delta.tool_calls[0].function.arguments if chunk.choices[0].delta.tool_calls[0].function.name:
name += chunk.choices[0].delta.tool_calls[0].function.name
arguments += chunk.choices[0].delta.tool_calls[0].function.arguments
assert chunk.choices[0].delta.content is None assert chunk.choices[0].delta.content is None
chunks.append(chunk) chunks.append(chunk)
assert ( assert name == "get_current_weather"
tool_calls_generated assert arguments == '{ "location": "Brooklyn, NY", "format": "fahrenheit"}'
== '{"function": {"_name": "get_current_weather", "location": "Paris, France", "format": "celsius"}}<|eot_id|>'
)
assert chunks == response_snapshot assert chunks == response_snapshot
@ -297,8 +299,6 @@ async def test_flash_llama_grammar_tools_insufficient_information_nostream(
content_generated = response.choices[0].message.content content_generated = response.choices[0].message.content
assert response.choices[0].message.tool_calls is None assert response.choices[0].message.tool_calls is None
######## FIXME before MERGE ############################
# TODO This is different from the streaming case, this is NOT normal.
assert content_generated == "I am a helpful assistant!" assert content_generated == "I am a helpful assistant!"
assert response == response_snapshot assert response == response_snapshot
@ -334,7 +334,8 @@ async def test_flash_llama_grammar_tools_insufficient_information_stream(
chunks.append(chunk) chunks.append(chunk)
assert chunk.choices[0].delta.tool_calls is None assert chunk.choices[0].delta.tool_calls is None
assert content_generated == "I am a helpful assistant" ######## This is exactly the same as the non streaming case
assert content_generated == "I am a helpful assistant!"
assert chunks == response_snapshot assert chunks == response_snapshot
@ -371,7 +372,7 @@ async def test_flash_llama_grammar_tools_sea_creatures_stream_auto(
assert ( assert (
content_generated content_generated
== "There was a wise old octopus named Oracle. He lived in a cozy little cave beneath the waves with his best friend, a curious seahorse named Finley. One day, Finley met a playful dolphin named Daisy, and the three became inseparable. They spent their days exploring the ocean, playing hide-and-seek, and learning about the wonders of the sea from Oracle" == "There was a wise old octopus named Oracle. He lived in a cozy little cave beneath the waves with his best friend, a curious seahorse named Finley. One day, Finley met a playful dolphin named Daisy, and the three became inseparable. They spent their days exploring the ocean, playing hide-and-seek, and learning about the wonders of the sea from Oracle."
) )
assert chunks == response_snapshot assert chunks == response_snapshot
@ -401,14 +402,18 @@ async def test_flash_llama_grammar_tools_sea_creatures_stream_required(
) )
tool_calls_generated = "" tool_calls_generated = ""
name = ""
chunks = [] chunks = []
for chunk in stream: for chunk in stream:
assert chunk.choices[0].delta.content is None assert chunk.choices[0].delta.content is None
if chunk.choices[0].delta.tool_calls[0].function.name:
name += chunk.choices[0].delta.tool_calls[0].function.name
tool_calls_generated += chunk.choices[0].delta.tool_calls[0].function.arguments tool_calls_generated += chunk.choices[0].delta.tool_calls[0].function.arguments
assert name == "get_n_day_weather_forecast"
assert ( assert (
tool_calls_generated tool_calls_generated
== '{"function": {"_name": "get_n_day_weather_forecast", "location": "San Francisco, CA", "format": "fahrenheit", "num_days":3}}<|eot_id|>' == '{ "location": "San Francisco, CA", "format": "fahrenheit", "num_days":3}'
) )
assert chunks == response_snapshot assert chunks == response_snapshot
@ -479,12 +484,17 @@ async def test_flash_llama_grammar_tools_sea_creatures_stream_function_object(
) )
chunks = [] chunks = []
tool_calls_generated = "" tool_calls_generated = ""
name = ""
for chunk in stream: for chunk in stream:
assert chunk.choices[0].delta.content is None
if chunk.choices[0].delta.tool_calls[0].function.name:
name += chunk.choices[0].delta.tool_calls[0].function.name
tool_calls_generated += chunk.choices[0].delta.tool_calls[0].function.arguments tool_calls_generated += chunk.choices[0].delta.tool_calls[0].function.arguments
chunks.append(chunk)
assert name == "get_n_day_weather_forecast"
assert ( assert (
tool_calls_generated tool_calls_generated
== '{"function": {"_name": "get_n_day_weather_forecast", "location": "San Francisco, CA", "format": "celsius", "num_days": 3}}<|eot_id|>' == '{ "location": "San Francisco, CA", "format": "celsius", "num_days": 3}'
) )
assert chunks == response_snapshot assert chunks == response_snapshot

978
router/src/chat.rs Normal file
View File

@ -0,0 +1,978 @@
use crate::{
infer::InferError, ChatCompletionChoice, ChatCompletionChunk, ChatCompletionDelta,
ChatCompletionLogprobs, CompletionType, DeltaToolCall, Function, FunctionDefinition,
StreamOptions, StreamResponse, TextMessage, ToolCallDelta, Usage,
};
use serde::Deserialize;
use serde_json::Value;
#[derive(Debug, Deserialize)]
#[serde(rename_all = "snake_case")]
enum _NoTool {
NoTool,
}
#[derive(Debug, Deserialize)]
struct NoToolCall {
_name: _NoTool,
content: String,
}
#[derive(Debug, Deserialize)]
struct NoTool {
function: NoToolCall,
}
#[derive(Debug, Deserialize)]
struct ToolCall {
_name: String,
#[serde(flatten, default)]
/// Using Map to preserve order
arguments: serde_json::Map<String, Value>,
}
#[derive(Debug, Deserialize)]
struct Call {
function: ToolCall,
}
pub(crate) fn parse_output(
generated_text: &str,
) -> Result<(Option<Vec<crate::ToolCall>>, Option<String>), InferError> {
let call: Call = serde_json::from_str(generated_text).map_err(|e| {
InferError::ToolError(format!(
"Failed to parse generated text: {} {:?}",
e, generated_text
))
})?;
let name = call.function._name;
match &name[..] {
"no_tool" => {
// parse the content message
let content_message = call
.function
.arguments
.get("content")
.and_then(Value::as_str)
.ok_or_else(|| {
InferError::ToolError("No `content` found in generated text".to_string())
})?
.to_string();
Ok((None, Some(content_message)))
}
name => {
let tool_calls = vec![crate::ToolCall {
id: "0".to_string(),
r#type: "function".to_string(),
function: FunctionDefinition {
description: None,
name: name.to_string(),
arguments: serde_json::to_value(call.function.arguments).map_err(|err| {
InferError::ToolError(format!(
"Could not convert arguments to JSON map {err}"
))
})?,
},
}];
Ok((Some(tool_calls), None))
}
}
}
/// Convert a StreamResponse into an Event to be sent over SSE
fn create_event_from_stream_token(
stream_token: &StreamResponse,
logprobs: bool,
inner_using_tools: bool,
system_fingerprint: String,
model_id: String,
function_name: Option<String>,
id: String,
) -> CompletionType {
let current_time = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_else(|_| std::time::Duration::from_secs(0))
.as_secs();
let logprobs = logprobs.then(|| {
ChatCompletionLogprobs::from((stream_token.token.clone(), stream_token.top_tokens.clone()))
});
// replace the content with the tool calls if grammar is present
let content = if !stream_token.token.special {
Some(stream_token.token.text.clone())
} else {
None
};
let (content, tool_calls) = if inner_using_tools {
// Cast into a vec
(None, content)
} else {
(content, None)
};
let finish_reason = stream_token
.details
.as_ref()
.map(|details| details.finish_reason.format(true));
let delta = match (content, tool_calls) {
(Some(delta), _) => ChatCompletionDelta::Chat(TextMessage {
role: "assistant".to_string(),
content: delta,
..Default::default()
}),
(None, Some(tool_calls)) => ChatCompletionDelta::Tool(ToolCallDelta {
role: "assistant".to_string(),
tool_calls: vec![DeltaToolCall {
index: 0,
id,
r#type: "function".to_string(),
function: Function {
name: function_name,
arguments: tool_calls,
},
}],
}),
(None, None) => ChatCompletionDelta::Chat(TextMessage {
role: "assistant".to_string(),
content: "".to_string(),
..Default::default()
}),
};
let choices = vec![ChatCompletionChoice {
index: 0,
delta,
logprobs,
finish_reason,
}];
CompletionType::ChatCompletionChunk(ChatCompletionChunk::new(
model_id,
system_fingerprint,
current_time,
choices,
None,
))
}
#[derive(Debug)]
enum StreamState {
/// Before the tools was parsed
Buffering,
/// We detected a tool call here
Tool,
/// During the `content` part of the tool call
NoTool,
/// Finishing frames of the ToolCall
NoToolFinish,
/// This is without tool calling
Content,
}
pub struct ChatState {
state: StreamState,
text: String,
options: StreamOptions,
model_id: String,
fingerprint: String,
logprobs: bool,
id: String,
}
impl ChatState {
pub fn new(
using_tools: bool,
options: StreamOptions,
fingerprint: String,
model_id: String,
logprobs: bool,
id: String,
) -> Self {
let state = if using_tools {
StreamState::Buffering
} else {
StreamState::Content
};
let text = String::new();
Self {
state,
text,
options,
fingerprint,
model_id,
logprobs,
id,
}
}
pub fn push(&mut self, mut stream_token: StreamResponse) -> Vec<CompletionType> {
let mut events = vec![];
let token_text = &stream_token.token.text;
match self.state {
StreamState::Buffering => {
self.text.push_str(token_text);
// We have a special match for `no_tool` in order to capture directly the `content`
// key which should be re-emitted as raw text.
if let Ok(value) = serde_json::from_str::<NoTool>(&format!("{}\"}}}}", self.text)) {
self.state = StreamState::NoTool;
// Modifiy the content of the token to be whatever was captured by the JSON
stream_token.token.text = value.function.content;
let chat_complete = create_event_from_stream_token(
&stream_token,
self.logprobs,
false,
self.fingerprint.clone(),
self.model_id.clone(),
None,
self.id.clone(),
);
events.push(chat_complete);
}
// XXX Caution, here we do not postfix the quote, so that the current output
// Is necessarily finished with quotes for us to be able to parse.
let partial = &self.text;
let partial = partial.trim_end_matches(|c: char| c.is_whitespace() || c == ',');
if let Ok(call) = serde_json::from_str::<Call>(&format!("{}}}}}", partial)) {
// This can be no_tool before the content has been emitted
if call.function._name != "no_tool" {
stream_token.token.text = "{".to_string();
let chat_complete = create_event_from_stream_token(
&stream_token,
self.logprobs,
true,
self.fingerprint.clone(),
self.model_id.clone(),
Some(call.function._name),
self.id.clone(),
);
events.push(chat_complete);
self.state = StreamState::Tool;
}
}
}
StreamState::Tool => {
self.text.push_str(token_text);
if serde_json::from_str::<Call>(&self.text).is_ok() {
self.state = StreamState::Buffering;
let mut text = stream_token.token.text.trim_end();
// Effectively trimming only the last closing brace
if text.ends_with('}') {
text = &text[..text.len() - 1];
}
stream_token.token.text = text.to_string();
let chat_complete = create_event_from_stream_token(
&stream_token,
self.logprobs,
true,
self.fingerprint.clone(),
self.model_id.clone(),
None,
self.id.clone(),
);
events.push(chat_complete);
} else {
let chat_complete = create_event_from_stream_token(
&stream_token,
self.logprobs,
true,
self.fingerprint.clone(),
self.model_id.clone(),
None,
self.id.clone(),
);
events.push(chat_complete);
}
}
// if we skipped sending the buffer we need to avoid sending the following json key and quotes
// We have remainder tokens, ignore everying,
StreamState::NoToolFinish => {}
StreamState::NoTool => {
self.text.push_str(token_text);
if token_text.contains("\"") {
let mut text = self
.text
.trim_end_matches(|c: char| c.is_whitespace() || c == '}');
// Trim once
if text.ends_with("\"") {
// Verify we have actually trimmed something
// The opposite can happen if the model is outputting inline JSON.
text = &text[..text.len() - 1];
if let Ok(_value) =
serde_json::from_str::<NoTool>(&format!("{}\"}}}}", text))
{
let mut text = token_text
.trim_end_matches(|c: char| c.is_whitespace() || c == '}');
// Effectively trim_end_match('"', 1)
// because we do not want to eventually trim finishing escaped quotes
// {{"\"Something\""}}
if text.ends_with("\"") {
text = &text[..text.len() - 1];
}
stream_token.token.text = text.to_string();
self.state = StreamState::NoToolFinish;
}
}
}
// This escaping is usually inline json escaping and we can therefore remove it.
stream_token.token.text = stream_token.token.text.replace("\\", "");
let chat_complete = create_event_from_stream_token(
&stream_token,
self.logprobs,
false,
self.fingerprint.clone(),
self.model_id.clone(),
None,
self.id.clone(),
);
events.push(chat_complete);
}
StreamState::Content => {
let chat_complete = create_event_from_stream_token(
&stream_token,
self.logprobs,
false,
self.fingerprint.clone(),
self.model_id.clone(),
None,
self.id.clone(),
);
events.push(chat_complete);
}
}
if self.options.include_usage {
if let Some(details) = stream_token.details {
let completion_tokens = details.generated_tokens;
let prompt_tokens = details.input_length;
let total_tokens = prompt_tokens + completion_tokens;
let usage = Usage {
completion_tokens,
prompt_tokens,
total_tokens,
};
let current_time = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_else(|_| std::time::Duration::from_secs(0))
.as_secs();
let chat_complete = CompletionType::ChatCompletionChunk(ChatCompletionChunk {
id: String::new(),
created: current_time,
model: self.model_id.clone(),
system_fingerprint: self.fingerprint.clone(),
choices: vec![],
usage: Some(Usage {
prompt_tokens: usage.prompt_tokens,
completion_tokens: usage.completion_tokens,
total_tokens: usage.total_tokens,
}),
});
events.push(chat_complete);
}
}
events
}
}
#[cfg(test)]
mod tests {
use crate::{
ChatCompletionChoice, ChatCompletionDelta, FinishReason, StreamDetails, TextMessage, Token,
};
use super::*;
fn get_text_content(event: &CompletionType) -> &String {
match event {
CompletionType::ChatCompletionChunk(ChatCompletionChunk { choices, .. }) => {
assert_eq!(choices.len(), 1);
if let ChatCompletionChoice {
delta: ChatCompletionDelta::Chat(TextMessage { content, .. }),
..
} = &choices[0]
{
content
} else {
panic!("Expected plain message");
}
}
_ => panic!("Unexpected chunk"),
}
}
fn get_tool_call_content(event: &CompletionType) -> (Option<&String>, &String) {
match event {
CompletionType::ChatCompletionChunk(ChatCompletionChunk { choices, .. }) => {
assert_eq!(choices.len(), 1);
if let ChatCompletionChoice {
delta: ChatCompletionDelta::Tool(ToolCallDelta { tool_calls, .. }),
..
} = &choices[0]
{
assert_eq!(tool_calls.len(), 1);
let DeltaToolCall {
index,
id,
r#type,
function,
} = &tool_calls[0];
assert_eq!(*index, 0);
assert_eq!(id, "0");
assert_eq!(r#type, "function");
(function.name.as_ref(), &function.arguments)
} else {
panic!("Expected plain message");
}
}
_ => panic!("Unexpected chunk"),
}
}
#[test]
fn test_chat_stream() {
let mut chat_state = ChatState::new(
false,
StreamOptions {
include_usage: false,
},
"fingerprint".to_string(),
"model_id".to_string(),
false,
"0".to_string(),
);
let events = chat_state.push(StreamResponse {
generated_text: None,
token: Token {
id: 42,
text: "Hi".to_string(),
logprob: 0.0,
special: false,
},
top_tokens: vec![],
index: 0,
details: None,
});
assert_eq!(events.len(), 1);
match &events[0] {
CompletionType::ChatCompletionChunk(ChatCompletionChunk { choices, .. }) => {
assert_eq!(
choices,
&[ChatCompletionChoice {
index: 0,
delta: ChatCompletionDelta::Chat(TextMessage {
role: "assistant".to_string(),
content: "Hi".to_string(),
tool_call_id: None,
}),
logprobs: None,
finish_reason: None,
}]
);
}
_ => panic!("Unexpected chunk"),
}
}
#[test]
fn test_chat_stream_usage() {
let mut chat_state = ChatState::new(
false,
StreamOptions {
include_usage: true,
},
"fingerprint".to_string(),
"model_id".to_string(),
false,
"0".to_string(),
);
let events = chat_state.push(StreamResponse {
generated_text: None,
token: Token {
id: 42,
text: "Hi".to_string(),
logprob: 0.0,
special: false,
},
top_tokens: vec![],
index: 0,
details: Some(StreamDetails {
input_length: 2,
generated_tokens: 10,
seed: None,
finish_reason: FinishReason::Length,
}),
});
assert_eq!(events.len(), 2);
match &events[0] {
CompletionType::ChatCompletionChunk(ChatCompletionChunk { choices, .. }) => {
assert_eq!(
choices,
&[ChatCompletionChoice {
index: 0,
delta: ChatCompletionDelta::Chat(TextMessage {
role: "assistant".to_string(),
content: "Hi".to_string(),
tool_call_id: None,
}),
logprobs: None,
// HAS A FINISH REASON
finish_reason: Some("length".to_string()),
}]
);
}
_ => panic!("Unexpected chunk"),
}
match &events[1] {
CompletionType::ChatCompletionChunk(ChatCompletionChunk { usage, .. }) => {
assert_eq!(
*usage,
Some(Usage {
prompt_tokens: 2,
completion_tokens: 10,
total_tokens: 12,
})
);
}
_ => panic!("Unexpected chunk"),
}
}
#[test]
fn test_chat_stream_tool_no_tool() {
let mut chat_state = ChatState::new(
true,
StreamOptions {
include_usage: true,
},
"fingerprint".to_string(),
"model_id".to_string(),
false,
"0".to_string(),
);
let tokens = vec![
"{\"".to_string(),
"function".to_string(),
"\":".to_string(),
" {\"".to_string(),
"_".to_string(),
"name".to_string(),
"\":".to_string(),
" \"".to_string(),
"no".to_string(),
"_tool".to_string(),
"\",".to_string(),
" \"".to_string(),
"content".to_string(),
"\":".to_string(),
" \"".to_string(), // Token 14
"I".to_string(), // Event 1
" am".to_string(), // Event 2
" a".to_string(), // Event 3
" helpful".to_string(), // Event 4
" assistant".to_string(), // Event 5
"!\"".to_string(), // Event 6 (with trailing quore removed)
"}".to_string(),
"}".to_string(),
];
let tokens: Vec<_> = tokens
.into_iter()
.map(|text| StreamResponse {
generated_text: None,
token: Token {
id: 42,
text: text.to_string(),
logprob: 0.0,
special: false,
},
top_tokens: vec![],
index: 0,
details: None,
})
.collect();
// Initial ignored output
for token in &tokens[..14] {
let events = chat_state.push(token.clone());
assert_eq!(events.len(), 0);
}
// No tool output
let mut output = String::new();
for token in &tokens[14..14 + 7] {
let events = chat_state.push(token.clone());
assert_eq!(events.len(), 1);
let content = get_text_content(&events[0]);
output.push_str(content);
}
assert_eq!(output, "I am a helpful assistant!");
// No tool finish
for token in &tokens[14 + 7..] {
let events = chat_state.push(token.clone());
assert_eq!(events.len(), 0);
}
}
#[test]
fn test_chat_stream_tool_no_tool_many_quotes() {
let mut chat_state = ChatState::new(
true,
StreamOptions {
include_usage: true,
},
"fingerprint".to_string(),
"model_id".to_string(),
false,
"0".to_string(),
);
let tokens = vec![
"{\"".to_string(),
"function".to_string(),
"\":".to_string(),
" {\"".to_string(),
"_".to_string(),
"name".to_string(),
"\":".to_string(),
" \"".to_string(),
"no".to_string(),
"_tool".to_string(),
"\",".to_string(),
" \"".to_string(),
"content".to_string(),
"\":".to_string(),
" \"".to_string(), // Token 14
"I".to_string(), // Event 1
" am".to_string(), // Event 2
" a".to_string(), // Event 3
" helpful".to_string(), // Event 4
" assistant".to_string(), // Event 5
"!\\\"\"".to_string(), // Extra inside the string quote that would get removed
"}".to_string(),
"}".to_string(),
];
// Initial ignored output
for text in &tokens[..14] {
let events = chat_state.push(StreamResponse {
generated_text: None,
token: Token {
id: 42,
text: text.to_string(),
logprob: 0.0,
special: false,
},
top_tokens: vec![],
index: 0,
details: None,
});
assert_eq!(events.len(), 0);
}
// No tool output
let mut output = String::new();
for text in &tokens[14..14 + 7] {
let events = chat_state.push(StreamResponse {
generated_text: None,
token: Token {
id: 42,
text: text.to_string(),
logprob: 0.0,
special: false,
},
top_tokens: vec![],
index: 0,
details: None,
});
assert_eq!(events.len(), 1);
match &events[0] {
CompletionType::ChatCompletionChunk(ChatCompletionChunk { choices, .. }) => {
assert_eq!(choices.len(), 1);
if let ChatCompletionChoice {
delta: ChatCompletionDelta::Chat(TextMessage { content, .. }),
..
} = &choices[0]
{
output.push_str(content);
} else {
panic!("Expected plain message");
}
}
_ => panic!("Unexpected chunk"),
}
}
assert_eq!(output, "I am a helpful assistant!\"");
// No tool finish
for text in &tokens[14 + 7..] {
let events = chat_state.push(StreamResponse {
generated_text: None,
token: Token {
id: 42,
text: text.to_string(),
logprob: 0.0,
special: false,
},
top_tokens: vec![],
index: 0,
details: None,
});
assert_eq!(events.len(), 0);
}
}
#[test]
fn test_chat_stream_tool_no_tool_inline_json() {
let mut chat_state = ChatState::new(
true,
StreamOptions {
include_usage: true,
},
"fingerprint".to_string(),
"model_id".to_string(),
false,
"0".to_string(),
);
let tokens = vec![
"{\"".to_string(),
"function".to_string(),
"\":".to_string(),
" {\"".to_string(),
"_".to_string(),
"name".to_string(),
"\":".to_string(),
" \"".to_string(),
"no".to_string(),
"_tool".to_string(),
"\",".to_string(),
" \"".to_string(),
"content".to_string(),
"\":".to_string(),
" \"".to_string(), // Token 14
"{\\\"".to_string(), // Event 1
"a".to_string(), // Event 1
"\\\":".to_string(), // Event 1
"2".to_string(), // Event 2
",\\".to_string(), // Event 2
"\"".to_string(), // Event 2
"b".to_string(), // Event 3
"\\\": ".to_string(), // Event 4
"1".to_string(), // Event 5
"}".to_string(), // Event 5
"\"}".to_string(), // Extra inside the string quote that would get removed
"}".to_string(),
];
let tokens: Vec<_> = tokens
.into_iter()
.map(|text| StreamResponse {
generated_text: None,
token: Token {
id: 42,
text: text.to_string(),
logprob: 0.0,
special: false,
},
top_tokens: vec![],
index: 0,
details: None,
})
.collect();
// Initial ignored output
for token in &tokens[..14] {
let events = chat_state.push(token.clone());
assert_eq!(events.len(), 0);
}
// No tool output
let mut output = String::new();
for token in &tokens[14..14 + 12] {
let events = chat_state.push(token.clone());
assert_eq!(events.len(), 1, "Current text is {output:?}");
let content = get_text_content(&events[0]);
output.push_str(content);
}
assert_eq!(output, "{\"a\":2,\"b\": 1}");
// No tool finish
for token in &tokens[14 + 12..] {
let events = chat_state.push(token.clone());
assert_eq!(events.len(), 0, "Extra events {events:?}");
}
}
#[test]
fn test_chat_stream_tool_no_tool_empty() {
let mut chat_state = ChatState::new(
true,
StreamOptions {
include_usage: true,
},
"fingerprint".to_string(),
"model_id".to_string(),
false,
"0".to_string(),
);
let tokens = vec![
"{\"".to_string(),
"function".to_string(),
"\":".to_string(),
" {\"".to_string(),
"_".to_string(),
"name".to_string(),
"\":".to_string(),
" \"".to_string(),
"no".to_string(),
"_tool".to_string(),
"\",".to_string(),
" \"".to_string(),
"content".to_string(),
"\":\"".to_string(),
"\"}".to_string(), // Token 13
"}".to_string(), // Event 1
];
let tokens: Vec<_> = tokens
.into_iter()
.map(|text| StreamResponse {
generated_text: None,
token: Token {
id: 42,
text: text.to_string(),
logprob: 0.0,
special: false,
},
top_tokens: vec![],
index: 0,
details: None,
})
.collect();
// Initial ignored output
for token in &tokens[..13] {
let events = chat_state.push(token.clone());
assert_eq!(events.len(), 0);
}
// No tool output
let mut output = String::new();
for token in &tokens[13..13 + 2] {
let events = chat_state.push(token.clone());
assert_eq!(events.len(), 1, "Current text is {output:?}");
let content = get_text_content(&events[0]);
output.push_str(content);
}
assert_eq!(output, "");
// No tool finish
for token in &tokens[13 + 2..] {
let events = chat_state.push(token.clone());
assert_eq!(events.len(), 0, "Extra events {events:?}");
}
}
#[test]
fn test_chat_stream_tool_get_weather() {
let mut chat_state = ChatState::new(
true,
StreamOptions {
include_usage: true,
},
"fingerprint".to_string(),
"model_id".to_string(),
false,
"0".to_string(),
);
let tokens = vec![
"{\"".to_string(),
"function".to_string(),
"\":".to_string(),
" {\"".to_string(),
"_".to_string(),
"name".to_string(),
"\":".to_string(),
" \"".to_string(),
"get".to_string(),
"_current".to_string(),
"_weather".to_string(),
"\",".to_string(),
// Event 1 is the function name
// Event 2 is the start of the arguments "{"
" \"".to_string(), // Event 3
"location".to_string(), // Event 4
"\":".to_string(), // Event 5
" \"".to_string(), // Event 6
"San".to_string(), // Event 7
" Francisco".to_string(), // Event 8
",".to_string(), // Event 9
" CA".to_string(), // Event 10
"\",".to_string(), // Event 11
" \"".to_string(), // Event 12
"format".to_string(), // Event 13
"\":".to_string(), // Event 14
" \"".to_string(), // Event 15
"c".to_string(), // Event 16
"elsius".to_string(), // Event 17
"\"}}".to_string(), // Event 18 retained (trailing brace removed)
];
let tokens: Vec<_> = tokens
.into_iter()
.map(|text| StreamResponse {
generated_text: None,
token: Token {
id: 42,
text: text.to_string(),
logprob: 0.0,
special: false,
},
top_tokens: vec![],
index: 0,
details: None,
})
.collect();
// Initial ignored output
for token in &tokens[..11] {
let events = chat_state.push(token.clone());
assert_eq!(events.len(), 0, "{events:?}");
}
// No tool output
let mut output = String::new();
let mut output_name = String::new();
for token in &tokens[11..11 + 17] {
let events = chat_state.push(token.clone());
assert_eq!(events.len(), 1);
let (name, arguments) = get_tool_call_content(&events[0]);
if let Some(name) = name {
assert_eq!(name, "get_current_weather");
output_name.push_str(&name);
}
output.push_str(arguments);
}
assert_eq!(output_name, "get_current_weather");
assert_eq!(
output,
"{ \"location\": \"San Francisco, CA\", \"format\": \"celsius\"}"
);
// No tool finish
for token in &tokens[11 + 17..] {
let events = chat_state.push(token.clone());
assert_eq!(events.len(), 0);
}
}
}

View File

@ -16,7 +16,7 @@ pub(crate) fn strftime_now(format_str: String) -> Result<String, minijinja::Erro
Ok(Local::now().format(&format_str).to_string()) Ok(Local::now().format(&format_str).to_string())
} }
#[derive(Clone)] #[derive(Debug, Clone)]
pub(crate) struct ChatTemplate { pub(crate) struct ChatTemplate {
template: Template<'static, 'static>, template: Template<'static, 'static>,
bos_token: Option<String>, bos_token: Option<String>,

View File

@ -52,7 +52,7 @@ pub struct Infer {
/// Request backend /// Request backend
backend: Arc<dyn Backend + Send + Sync>, backend: Arc<dyn Backend + Send + Sync>,
/// Chat template /// Chat template
chat_template: Option<ChatTemplate>, pub(crate) chat_template: Option<ChatTemplate>,
/// Inference limit /// Inference limit
limit_concurrent_requests: Arc<Semaphore>, limit_concurrent_requests: Arc<Semaphore>,
/// Backend health /// Backend health

View File

@ -8,6 +8,7 @@ pub mod validation;
mod kserve; mod kserve;
pub mod logging; pub mod logging;
mod chat;
mod sagemaker; mod sagemaker;
pub mod usage_stats; pub mod usage_stats;
mod vertex; mod vertex;
@ -20,6 +21,7 @@ use serde::{Deserialize, Serialize};
use tokenizers::Encoding; use tokenizers::Encoding;
use tracing::warn; use tracing::warn;
use utoipa::ToSchema; use utoipa::ToSchema;
use uuid::Uuid;
use validation::Validation; use validation::Validation;
#[allow(clippy::large_enum_variant)] #[allow(clippy::large_enum_variant)]
@ -541,6 +543,7 @@ pub(crate) struct Chunk {
} }
#[derive(Clone, Deserialize, Serialize, ToSchema)] #[derive(Clone, Deserialize, Serialize, ToSchema)]
#[cfg_attr(test, derive(Debug))]
pub(crate) struct ChatCompletion { pub(crate) struct ChatCompletion {
pub id: String, pub id: String,
#[schema(example = "1706270835")] #[schema(example = "1706270835")]
@ -553,6 +556,7 @@ pub(crate) struct ChatCompletion {
} }
#[derive(Clone, Deserialize, Serialize, ToSchema)] #[derive(Clone, Deserialize, Serialize, ToSchema)]
#[cfg_attr(test, derive(Debug))]
pub(crate) struct ChatCompletionComplete { pub(crate) struct ChatCompletionComplete {
pub index: u32, pub index: u32,
pub message: OutputMessage, pub message: OutputMessage,
@ -561,6 +565,7 @@ pub(crate) struct ChatCompletionComplete {
} }
#[derive(Clone, Deserialize, Serialize, ToSchema)] #[derive(Clone, Deserialize, Serialize, ToSchema)]
#[cfg_attr(test, derive(Debug, PartialEq))]
pub(crate) struct ChatCompletionLogprobs { pub(crate) struct ChatCompletionLogprobs {
content: Vec<ChatCompletionLogprob>, content: Vec<ChatCompletionLogprob>,
} }
@ -619,6 +624,7 @@ impl From<(Vec<Token>, Vec<Vec<Token>>)> for ChatCompletionLogprobs {
} }
#[derive(Clone, Deserialize, Serialize, ToSchema)] #[derive(Clone, Deserialize, Serialize, ToSchema)]
#[cfg_attr(test, derive(Debug, PartialEq))]
pub(crate) struct ChatCompletionLogprob { pub(crate) struct ChatCompletionLogprob {
token: String, token: String,
logprob: f32, logprob: f32,
@ -626,12 +632,14 @@ pub(crate) struct ChatCompletionLogprob {
} }
#[derive(Clone, Deserialize, Serialize, ToSchema)] #[derive(Clone, Deserialize, Serialize, ToSchema)]
#[cfg_attr(test, derive(Debug, PartialEq))]
pub(crate) struct ChatCompletionTopLogprob { pub(crate) struct ChatCompletionTopLogprob {
token: String, token: String,
logprob: f32, logprob: f32,
} }
#[derive(Clone, Deserialize, Serialize, ToSchema, Default)] #[derive(Clone, Deserialize, Serialize, ToSchema, Default)]
#[cfg_attr(test, derive(Debug, PartialEq))]
pub(crate) struct Usage { pub(crate) struct Usage {
pub prompt_tokens: u32, pub prompt_tokens: u32,
pub completion_tokens: u32, pub completion_tokens: u32,
@ -640,6 +648,7 @@ pub(crate) struct Usage {
#[derive(Clone, Serialize, ToSchema)] #[derive(Clone, Serialize, ToSchema)]
#[serde(tag = "object")] #[serde(tag = "object")]
#[cfg_attr(test, derive(Debug))]
enum CompletionType { enum CompletionType {
#[serde(rename = "chat.completion.chunk")] #[serde(rename = "chat.completion.chunk")]
ChatCompletionChunk(ChatCompletionChunk), ChatCompletionChunk(ChatCompletionChunk),
@ -707,6 +716,7 @@ impl ChatCompletion {
} }
} }
#[derive(Clone, Serialize, ToSchema)] #[derive(Clone, Serialize, ToSchema)]
#[cfg_attr(test, derive(Debug))]
pub(crate) struct ChatCompletionChunk { pub(crate) struct ChatCompletionChunk {
pub id: String, pub id: String,
#[schema(example = "1706270978")] #[schema(example = "1706270978")]
@ -719,6 +729,7 @@ pub(crate) struct ChatCompletionChunk {
} }
#[derive(Clone, Serialize, ToSchema)] #[derive(Clone, Serialize, ToSchema)]
#[cfg_attr(test, derive(Debug, PartialEq))]
pub(crate) struct ChatCompletionChoice { pub(crate) struct ChatCompletionChoice {
pub index: u32, pub index: u32,
pub delta: ChatCompletionDelta, pub delta: ChatCompletionDelta,
@ -735,6 +746,7 @@ pub struct ToolCallDelta {
#[derive(Clone, Debug, Serialize, ToSchema)] #[derive(Clone, Debug, Serialize, ToSchema)]
#[serde(untagged)] #[serde(untagged)]
#[cfg_attr(test, derive(PartialEq))]
enum ChatCompletionDelta { enum ChatCompletionDelta {
Chat(TextMessage), Chat(TextMessage),
Tool(ToolCallDelta), Tool(ToolCallDelta),
@ -759,48 +771,17 @@ impl ChatCompletionChunk {
pub(crate) fn new( pub(crate) fn new(
model: String, model: String,
system_fingerprint: String, system_fingerprint: String,
delta: Option<String>,
tool_calls: Option<Vec<String>>,
created: u64, created: u64,
logprobs: Option<ChatCompletionLogprobs>, choices: Vec<ChatCompletionChoice>,
finish_reason: Option<String>, usage: Option<Usage>,
) -> Self { ) -> Self {
let delta = match (delta, tool_calls) {
(Some(delta), _) => ChatCompletionDelta::Chat(TextMessage {
role: "assistant".to_string(),
content: delta,
..Default::default()
}),
(None, Some(tool_calls)) => ChatCompletionDelta::Tool(ToolCallDelta {
role: "assistant".to_string(),
tool_calls: vec![DeltaToolCall {
index: 0,
id: String::new(),
r#type: "function".to_string(),
function: Function {
name: None,
arguments: tool_calls[0].to_string(),
},
}],
}),
(None, None) => ChatCompletionDelta::Chat(TextMessage {
role: "assistant".to_string(),
content: "".to_string(),
..Default::default()
}),
};
Self { Self {
id: String::new(), id: String::new(),
created, created,
model, model,
system_fingerprint, system_fingerprint,
choices: vec![ChatCompletionChoice { choices,
index: 0, usage,
delta,
logprobs,
finish_reason,
}],
usage: None,
} }
} }
} }
@ -915,7 +896,7 @@ pub(crate) struct ChatRequest {
/// Options for streaming response. Only set this when you set stream: true. /// Options for streaming response. Only set this when you set stream: true.
#[serde(default)] #[serde(default)]
#[schema(nullable = true, example = "null")] #[schema(nullable = true, example = "null")]
pub stream_options: Option<StreamOptions>, pub stream_options: StreamOptions,
} }
impl ChatRequest { impl ChatRequest {
@ -1015,13 +996,37 @@ impl ChatRequest {
using_tools, using_tools,
)) ))
} }
fn next_int_id(&self) -> Result<String, Box<dyn std::error::Error>> {
let mut id: usize = 0;
for message in &self.messages {
if let MessageBody::Tool { tool_calls } = &message.body {
for tool_call in tool_calls {
let new_id: usize = tool_call.id.parse()?;
id = std::cmp::max(id, new_id + 1);
}
}
}
Ok(id.to_string())
} }
#[derive(Clone, Deserialize, ToSchema, Serialize)] /// Try to have linearly increasing id
/// or resort to using Uuid if the initial
/// scheme is not understood
fn next_tool_call_id(&self) -> String {
self.next_int_id().unwrap_or_else(|_| {
let uid = Uuid::new_v4().to_string();
uid.to_string()
})
}
}
#[derive(Clone, Deserialize, ToSchema, Serialize, Default)]
#[cfg_attr(test, derive(Debug, PartialEq))] #[cfg_attr(test, derive(Debug, PartialEq))]
struct StreamOptions { struct StreamOptions {
/// If set, an additional chunk will be streamed before the data: [DONE] message. The usage field on this chunk shows the token usage statistics for the entire request, and the choices field will always be an empty array. All other chunks will also include a usage field, but with a null value. /// If set, an additional chunk will be streamed before the data: [DONE] message. The usage field on this chunk shows the token usage statistics for the entire request, and the choices field will always be an empty array. All other chunks will also include a usage field, but with a null value.
#[schema(example = "true")] #[schema(example = "true")]
#[serde(default)]
include_usage: bool, include_usage: bool,
} }
@ -1445,7 +1450,7 @@ pub(crate) struct ChatTokenizeResponse {
#[serde(transparent)] #[serde(transparent)]
pub(crate) struct TokenizeResponse(Vec<SimpleToken>); pub(crate) struct TokenizeResponse(Vec<SimpleToken>);
#[derive(Serialize, ToSchema)] #[derive(Serialize, ToSchema, Clone)]
pub(crate) struct StreamDetails { pub(crate) struct StreamDetails {
#[schema(example = "length")] #[schema(example = "length")]
pub finish_reason: FinishReason, pub finish_reason: FinishReason,
@ -1457,7 +1462,7 @@ pub(crate) struct StreamDetails {
pub input_length: u32, pub input_length: u32,
} }
#[derive(Serialize, ToSchema)] #[derive(Serialize, ToSchema, Clone)]
pub(crate) struct StreamResponse { pub(crate) struct StreamResponse {
pub index: u32, pub index: u32,
pub token: Token, pub token: Token,
@ -1700,9 +1705,25 @@ mod tests {
assert!(matches!( assert!(matches!(
request.stream_options, request.stream_options,
Some(StreamOptions { StreamOptions {
include_usage: true include_usage: true
}) }
));
let json = json!({
"model": "",
"messages": [{
"role": "user",
"content": "Hello"
}]
});
let request: ChatRequest = serde_json::from_str(json.to_string().as_str()).unwrap();
assert!(matches!(
request.stream_options,
StreamOptions {
include_usage: false
}
)); ));
} }

View File

@ -1,3 +1,4 @@
use crate::chat::ChatState;
/// HTTP Server logic /// HTTP Server logic
use crate::config::Config; use crate::config::Config;
use crate::infer::{Backend, Infer, InferError, InferResponse, InferStreamResponse}; use crate::infer::{Backend, Infer, InferError, InferResponse, InferStreamResponse};
@ -47,8 +48,6 @@ use http::header::AUTHORIZATION;
use metrics_exporter_prometheus::{Matcher, PrometheusBuilder, PrometheusHandle}; use metrics_exporter_prometheus::{Matcher, PrometheusBuilder, PrometheusHandle};
use pyo3::prelude::*; use pyo3::prelude::*;
use pyo3::types::IntoPyDict; use pyo3::types::IntoPyDict;
use regex::Regex;
use serde_json::Value;
use std::convert::Infallible; use std::convert::Infallible;
use std::fs::File; use std::fs::File;
use std::io::BufReader; use std::io::BufReader;
@ -1114,62 +1113,6 @@ pub(crate) async fn completions(
} }
} }
enum StreamState {
Buffering,
BufferTrailing,
Content { skip_close_quote: bool },
}
/// Convert a StreamResponse into an Event to be sent over SSE
fn create_event_from_stream_token(
stream_token: &StreamResponse,
logprobs: bool,
inner_using_tools: bool,
system_fingerprint: String,
model_id: String,
) -> Event {
let event = Event::default();
let current_time = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_else(|_| std::time::Duration::from_secs(0))
.as_secs();
let logprobs = logprobs.then(|| {
ChatCompletionLogprobs::from((stream_token.token.clone(), stream_token.top_tokens.clone()))
});
// replace the content with the tool calls if grammar is present
let (content, tool_calls) = if inner_using_tools {
(None, Some(vec![stream_token.token.text.clone()]))
} else {
let content = if !stream_token.token.special {
Some(stream_token.token.text.clone())
} else {
None
};
(content, None)
};
let finish_reason = stream_token
.details
.as_ref()
.map(|details| details.finish_reason.format(true));
let chat_complete = CompletionType::ChatCompletionChunk(ChatCompletionChunk::new(
model_id.clone(),
system_fingerprint.clone(),
content,
tool_calls,
current_time,
logprobs,
finish_reason,
));
event.json_data(chat_complete).unwrap_or_else(|e| {
println!("Failed to serialize ChatCompletionChunk: {:?}", e);
Event::default()
})
}
/// Generate tokens /// Generate tokens
#[utoipa::path( #[utoipa::path(
post, post,
@ -1219,6 +1162,9 @@ pub(crate) async fn chat_completions(
logprobs, logprobs,
.. ..
} = chat.clone(); } = chat.clone();
tracing::debug!("Got chat_template {:?}", infer.chat_template);
let id = chat.next_tool_call_id();
let (generate_request, using_tools): (GenerateRequest, bool) = let (generate_request, using_tools): (GenerateRequest, bool) =
chat.try_into_generate(&infer)?; chat.try_into_generate(&infer)?;
span.record("parameters", format!("{:?}", generate_request.parameters)); span.record("parameters", format!("{:?}", generate_request.parameters));
@ -1235,165 +1181,19 @@ pub(crate) async fn chat_completions(
let (headers, response_stream) = let (headers, response_stream) =
generate_stream_internal(infer, compute_type, Json(generate_request), span).await; generate_stream_internal(infer, compute_type, Json(generate_request), span).await;
// regex to match any function name
let function_regex = match Regex::new(r#"\{"function":\{"_name":"([^"]+)""#) {
Ok(regex) => regex,
Err(e) => {
return Err((
StatusCode::INTERNAL_SERVER_ERROR,
Json(ErrorResponse {
error: format!("Failed to compile regex: {}", e),
error_type: "regex".to_string(),
}),
))
}
};
let response_stream = async_stream::stream! { let response_stream = async_stream::stream! {
let mut response_stream = Box::pin(response_stream); let mut response_stream = Box::pin(response_stream);
let mut buffer = Vec::new(); let mut state = ChatState::new(using_tools, stream_options, system_fingerprint, model_id, logprobs, id);
let mut json_buffer = String::new();
let mut state = if using_tools {
StreamState::Buffering
} else {
StreamState::Content {
skip_close_quote: false,
}
};
let mut response_as_tool = using_tools;
while let Some(result) = response_stream.next().await { while let Some(result) = response_stream.next().await {
match result{ match result{
Ok(stream_token) => { Ok(stream_token) => {
let token_text = &stream_token.token.text.clone(); let events = state.push(stream_token);
let usage = stream_token.details.as_ref().map(|details| { for chat_complete in events{
let completion_tokens = details.generated_tokens; yield Ok(Event::default().json_data(chat_complete).unwrap_or_else(|e| {
let prompt_tokens = details.input_length; tracing::error!("Failed to serialize ChatCompletionChunk: {:?}", e);
let total_tokens = prompt_tokens + completion_tokens; Event::default()
Usage {
completion_tokens,
prompt_tokens,
total_tokens,
}
});
match state {
StreamState::Buffering => {
json_buffer.push_str(&token_text.replace(" ", ""));
buffer.push(stream_token);
if let Some(captures) = function_regex.captures(&json_buffer) {
let function_name = captures[1].to_string();
if function_name == "no_tool" {
state = StreamState::BufferTrailing;
response_as_tool = false;
buffer.clear();
json_buffer.clear();
} else {
state = StreamState::Content {
skip_close_quote: false,
};
// send all the buffered messages
for stream_token in &buffer {
let event = create_event_from_stream_token(
stream_token,
logprobs,
response_as_tool,
system_fingerprint.clone(),
model_id.clone(),
);
yield Ok::<Event, Infallible>(event);
}
}
}
}
// if we skipped sending the buffer we need to avoid sending the following json key and quotes
StreamState::BufferTrailing => {
let infix_text = "\"content\":\"";
json_buffer.push_str(&token_text.replace(" ", ""));
// keep capturing until we find the infix text
match json_buffer.find(infix_text) {
Some(content_key_index) => {
json_buffer =
json_buffer[content_key_index + infix_text.len()..].to_string();
}
None => {
continue;
}
}
// if there is leftover text after removing the infix text, we need to send it
if !json_buffer.is_empty() {
let event = Event::default();
let current_time = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_else(|_| std::time::Duration::from_secs(0))
.as_secs();
let chat_complete =
CompletionType::ChatCompletionChunk(ChatCompletionChunk::new(
model_id.clone(),
system_fingerprint.clone(),
Some(json_buffer.clone()),
None,
current_time,
None,
None,
));
yield Ok(event.json_data(chat_complete).unwrap_or_else(|e| {
InferError::StreamSerializationError(e.to_string()).into()
})); }));
} }
// cleanup the buffers
buffer.clear();
json_buffer.clear();
state = StreamState::Content {
skip_close_quote: true,
};
}
StreamState::Content { skip_close_quote } => {
if skip_close_quote && token_text.contains('"') {
break;
}
// send the content
let event = create_event_from_stream_token(
&stream_token,
logprobs,
response_as_tool,
system_fingerprint.clone(),
model_id.clone(),
);
yield Ok::<Event, Infallible>(event);
}
}
let should_send_usage = usage.is_some()
&& stream_options
.as_ref()
.is_some_and(|opts| opts.include_usage);
if should_send_usage {
let usage_data = usage.unwrap();
let current_time = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_else(|_| std::time::Duration::from_secs(0))
.as_secs();
let chat_complete = CompletionType::ChatCompletionChunk(ChatCompletionChunk {
id: String::new(),
created: current_time,
model: model_id.clone(),
system_fingerprint: system_fingerprint.clone(),
choices: vec![],
usage: Some(Usage {
prompt_tokens: usage_data.prompt_tokens,
completion_tokens: usage_data.completion_tokens,
total_tokens: usage_data.total_tokens,
}),
});
yield Ok(Event::default()
.json_data(chat_complete)
.unwrap_or_else(|e| InferError::StreamSerializationError(e.to_string()).into()));
}
} }
Err(err) => yield Ok(err.into_openai_event()) Err(err) => yield Ok(err.into_openai_event())
} }
@ -1413,56 +1213,7 @@ pub(crate) async fn chat_completions(
.as_secs(); .as_secs();
let (tool_calls, output) = if using_tools { let (tool_calls, output) = if using_tools {
let gen_text_value: Value = crate::chat::parse_output(&generation.generated_text)?
serde_json::from_str(&generation.generated_text).map_err(|e| {
InferError::ToolError(format!(
"Failed to parse generated text: {} {:?}",
e, generation.generated_text
))
})?;
let function = gen_text_value.get("function").ok_or(InferError::ToolError(
"No function found in generated text".to_string(),
))?;
let name = function
.get("_name")
.and_then(Value::as_str)
.ok_or(InferError::ToolError(
"No _name found in generated text".to_string(),
))?
.to_string();
let mut arguments = function.clone();
if let Value::Object(ref mut props) = arguments {
props.remove("_name");
}
match name.as_str() {
"no_tool" => {
// parse the content message
let content_message = arguments
.get("content")
.and_then(Value::as_str)
.ok_or_else(|| {
InferError::ToolError(
"No `content` found in generated text".to_string(),
)
})?
.to_string();
(None, Some(content_message))
}
_ => {
let tool_calls = vec![ToolCall {
id: "0".to_string(),
r#type: "function".to_string(),
function: FunctionDefinition {
description: None,
name,
arguments,
},
}];
(Some(tool_calls), None)
}
}
} else { } else {
(None, Some(generation.generated_text)) (None, Some(generation.generated_text))
}; };
@ -1817,6 +1568,7 @@ pub async fn run(
) )
} }
Type::Cache(cache) => { Type::Cache(cache) => {
tracing::info!("Cache {cache:?}");
let repo = cache.repo(Repo::with_revision( let repo = cache.repo(Repo::with_revision(
tokenizer_name.to_string(), tokenizer_name.to_string(),
RepoType::Model, RepoType::Model,
@ -1833,6 +1585,7 @@ pub async fn run(
}; };
// Read the JSON contents of the file as an instance of 'HubTokenizerConfig'. // Read the JSON contents of the file as an instance of 'HubTokenizerConfig'.
tracing::warn!("Tokenizer_config {tokenizer_config_path:?} - {tokenizer_config_filename:?}");
let tokenizer_config: Option<HubTokenizerConfig> = if let Some(filename) = tokenizer_config_path let tokenizer_config: Option<HubTokenizerConfig> = if let Some(filename) = tokenizer_config_path
{ {
HubTokenizerConfig::from_file(filename) HubTokenizerConfig::from_file(filename)

View File

@ -542,6 +542,7 @@ class Qwen2VLForConditionalGeneration(nn.Module):
max_s=max_s, max_s=max_s,
true_max_s=max_s, true_max_s=max_s,
prefill_cache_indices=prefill_cache_indices, prefill_cache_indices=prefill_cache_indices,
adapter_data=adapter_data,
) )
if lm_head_indices is not None: if lm_head_indices is not None:
hidden_states = hidden_states[lm_head_indices] hidden_states = hidden_states[lm_head_indices]