mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-19 22:02:06 +00:00
Fix tool call3 (#3086)
* Fixing the tool calling convention. * Update tehe doc. * Fixing some corner cases. * Fixing the tool call id. * Fmt. * Snapshot update with the new updated tool_call_id. * More qwen2.
This commit is contained in:
parent
ae4451c3da
commit
f74c36fe0d
@ -29,7 +29,7 @@ homepage = "https://github.com/huggingface/text-generation-inference"
|
|||||||
[workspace.dependencies]
|
[workspace.dependencies]
|
||||||
base64 = "0.22.0"
|
base64 = "0.22.0"
|
||||||
tokenizers = { version = "0.20.0", features = ["http"] }
|
tokenizers = { version = "0.20.0", features = ["http"] }
|
||||||
hf-hub = { version = "0.4.1", features = ["tokio"] }
|
hf-hub = { version = "0.4.2", features = ["tokio"] }
|
||||||
metrics = { version = "0.23.0" }
|
metrics = { version = "0.23.0" }
|
||||||
metrics-exporter-prometheus = { version = "0.15.1", features = [] }
|
metrics-exporter-prometheus = { version = "0.15.1", features = [] }
|
||||||
minijinja = { version = "2.2.0", features = ["json"] }
|
minijinja = { version = "2.2.0", features = ["json"] }
|
||||||
|
@ -2148,9 +2148,6 @@
|
|||||||
},
|
},
|
||||||
"StreamOptions": {
|
"StreamOptions": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"required": [
|
|
||||||
"include_usage"
|
|
||||||
],
|
|
||||||
"properties": {
|
"properties": {
|
||||||
"include_usage": {
|
"include_usage": {
|
||||||
"type": "boolean",
|
"type": "boolean",
|
||||||
|
@ -10,7 +10,7 @@
|
|||||||
"tool_calls": [
|
"tool_calls": [
|
||||||
{
|
{
|
||||||
"function": {
|
"function": {
|
||||||
"arguments": "{\"format\":\"fahrenheit\",\"location\":\"Brooklyn, NY\"}",
|
"arguments": "{\"location\":\"Brooklyn, NY\",\"format\":\"fahrenheit\"}",
|
||||||
"description": null,
|
"description": null,
|
||||||
"name": "get_current_weather"
|
"name": "get_current_weather"
|
||||||
},
|
},
|
||||||
@ -21,7 +21,7 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263682,
|
"created": 1741372434,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion",
|
"object": "chat.completion",
|
||||||
|
@ -10,7 +10,7 @@
|
|||||||
"tool_calls": [
|
"tool_calls": [
|
||||||
{
|
{
|
||||||
"function": {
|
"function": {
|
||||||
"arguments": "{\"format\":\"fahrenheit\",\"location\":\"Brooklyn, NY\"}",
|
"arguments": "{\"location\":\"Brooklyn, NY\",\"format\":\"fahrenheit\"}",
|
||||||
"description": null,
|
"description": null,
|
||||||
"name": "get_current_weather"
|
"name": "get_current_weather"
|
||||||
},
|
},
|
||||||
@ -21,7 +21,7 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263684,
|
"created": 1741372657,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion",
|
"object": "chat.completion",
|
||||||
|
@ -8,10 +8,10 @@
|
|||||||
"tool_calls": [
|
"tool_calls": [
|
||||||
{
|
{
|
||||||
"function": {
|
"function": {
|
||||||
"arguments": "{\"",
|
"arguments": "{",
|
||||||
"name": null
|
"name": "get_current_weather"
|
||||||
},
|
},
|
||||||
"id": "",
|
"id": "0",
|
||||||
"index": 0,
|
"index": 0,
|
||||||
"type": "function"
|
"type": "function"
|
||||||
}
|
}
|
||||||
@ -22,187 +22,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263685,
|
"created": 1741688515,
|
||||||
"id": "",
|
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
|
||||||
"object": "chat.completion.chunk",
|
|
||||||
"system_fingerprint": "3.1.2-dev0-native",
|
|
||||||
"usage": null
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"choices": [
|
|
||||||
{
|
|
||||||
"delta": {
|
|
||||||
"content": null,
|
|
||||||
"role": "assistant",
|
|
||||||
"tool_calls": [
|
|
||||||
{
|
|
||||||
"function": {
|
|
||||||
"arguments": "function",
|
|
||||||
"name": null
|
|
||||||
},
|
|
||||||
"id": "",
|
|
||||||
"index": 0,
|
|
||||||
"type": "function"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"finish_reason": null,
|
|
||||||
"index": 0,
|
|
||||||
"logprobs": null
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"created": 1741263685,
|
|
||||||
"id": "",
|
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
|
||||||
"object": "chat.completion.chunk",
|
|
||||||
"system_fingerprint": "3.1.2-dev0-native",
|
|
||||||
"usage": null
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"choices": [
|
|
||||||
{
|
|
||||||
"delta": {
|
|
||||||
"content": null,
|
|
||||||
"role": "assistant",
|
|
||||||
"tool_calls": [
|
|
||||||
{
|
|
||||||
"function": {
|
|
||||||
"arguments": "\":",
|
|
||||||
"name": null
|
|
||||||
},
|
|
||||||
"id": "",
|
|
||||||
"index": 0,
|
|
||||||
"type": "function"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"finish_reason": null,
|
|
||||||
"index": 0,
|
|
||||||
"logprobs": null
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"created": 1741263685,
|
|
||||||
"id": "",
|
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
|
||||||
"object": "chat.completion.chunk",
|
|
||||||
"system_fingerprint": "3.1.2-dev0-native",
|
|
||||||
"usage": null
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"choices": [
|
|
||||||
{
|
|
||||||
"delta": {
|
|
||||||
"content": null,
|
|
||||||
"role": "assistant",
|
|
||||||
"tool_calls": [
|
|
||||||
{
|
|
||||||
"function": {
|
|
||||||
"arguments": " {\"",
|
|
||||||
"name": null
|
|
||||||
},
|
|
||||||
"id": "",
|
|
||||||
"index": 0,
|
|
||||||
"type": "function"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"finish_reason": null,
|
|
||||||
"index": 0,
|
|
||||||
"logprobs": null
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"created": 1741263685,
|
|
||||||
"id": "",
|
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
|
||||||
"object": "chat.completion.chunk",
|
|
||||||
"system_fingerprint": "3.1.2-dev0-native",
|
|
||||||
"usage": null
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"choices": [
|
|
||||||
{
|
|
||||||
"delta": {
|
|
||||||
"content": null,
|
|
||||||
"role": "assistant",
|
|
||||||
"tool_calls": [
|
|
||||||
{
|
|
||||||
"function": {
|
|
||||||
"arguments": "_",
|
|
||||||
"name": null
|
|
||||||
},
|
|
||||||
"id": "",
|
|
||||||
"index": 0,
|
|
||||||
"type": "function"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"finish_reason": null,
|
|
||||||
"index": 0,
|
|
||||||
"logprobs": null
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"created": 1741263685,
|
|
||||||
"id": "",
|
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
|
||||||
"object": "chat.completion.chunk",
|
|
||||||
"system_fingerprint": "3.1.2-dev0-native",
|
|
||||||
"usage": null
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"choices": [
|
|
||||||
{
|
|
||||||
"delta": {
|
|
||||||
"content": null,
|
|
||||||
"role": "assistant",
|
|
||||||
"tool_calls": [
|
|
||||||
{
|
|
||||||
"function": {
|
|
||||||
"arguments": "name",
|
|
||||||
"name": null
|
|
||||||
},
|
|
||||||
"id": "",
|
|
||||||
"index": 0,
|
|
||||||
"type": "function"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"finish_reason": null,
|
|
||||||
"index": 0,
|
|
||||||
"logprobs": null
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"created": 1741263685,
|
|
||||||
"id": "",
|
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
|
||||||
"object": "chat.completion.chunk",
|
|
||||||
"system_fingerprint": "3.1.2-dev0-native",
|
|
||||||
"usage": null
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"choices": [
|
|
||||||
{
|
|
||||||
"delta": {
|
|
||||||
"content": null,
|
|
||||||
"role": "assistant",
|
|
||||||
"tool_calls": [
|
|
||||||
{
|
|
||||||
"function": {
|
|
||||||
"arguments": "\":",
|
|
||||||
"name": null
|
|
||||||
},
|
|
||||||
"id": "",
|
|
||||||
"index": 0,
|
|
||||||
"type": "function"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"finish_reason": null,
|
|
||||||
"index": 0,
|
|
||||||
"logprobs": null
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"created": 1741263685,
|
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -221,7 +41,7 @@
|
|||||||
"arguments": " \"",
|
"arguments": " \"",
|
||||||
"name": null
|
"name": null
|
||||||
},
|
},
|
||||||
"id": "",
|
"id": "0",
|
||||||
"index": 0,
|
"index": 0,
|
||||||
"type": "function"
|
"type": "function"
|
||||||
}
|
}
|
||||||
@ -232,157 +52,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263685,
|
"created": 1741688515,
|
||||||
"id": "",
|
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
|
||||||
"object": "chat.completion.chunk",
|
|
||||||
"system_fingerprint": "3.1.2-dev0-native",
|
|
||||||
"usage": null
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"choices": [
|
|
||||||
{
|
|
||||||
"delta": {
|
|
||||||
"content": null,
|
|
||||||
"role": "assistant",
|
|
||||||
"tool_calls": [
|
|
||||||
{
|
|
||||||
"function": {
|
|
||||||
"arguments": "get",
|
|
||||||
"name": null
|
|
||||||
},
|
|
||||||
"id": "",
|
|
||||||
"index": 0,
|
|
||||||
"type": "function"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"finish_reason": null,
|
|
||||||
"index": 0,
|
|
||||||
"logprobs": null
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"created": 1741263685,
|
|
||||||
"id": "",
|
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
|
||||||
"object": "chat.completion.chunk",
|
|
||||||
"system_fingerprint": "3.1.2-dev0-native",
|
|
||||||
"usage": null
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"choices": [
|
|
||||||
{
|
|
||||||
"delta": {
|
|
||||||
"content": null,
|
|
||||||
"role": "assistant",
|
|
||||||
"tool_calls": [
|
|
||||||
{
|
|
||||||
"function": {
|
|
||||||
"arguments": "_current",
|
|
||||||
"name": null
|
|
||||||
},
|
|
||||||
"id": "",
|
|
||||||
"index": 0,
|
|
||||||
"type": "function"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"finish_reason": null,
|
|
||||||
"index": 0,
|
|
||||||
"logprobs": null
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"created": 1741263685,
|
|
||||||
"id": "",
|
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
|
||||||
"object": "chat.completion.chunk",
|
|
||||||
"system_fingerprint": "3.1.2-dev0-native",
|
|
||||||
"usage": null
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"choices": [
|
|
||||||
{
|
|
||||||
"delta": {
|
|
||||||
"content": null,
|
|
||||||
"role": "assistant",
|
|
||||||
"tool_calls": [
|
|
||||||
{
|
|
||||||
"function": {
|
|
||||||
"arguments": "_weather",
|
|
||||||
"name": null
|
|
||||||
},
|
|
||||||
"id": "",
|
|
||||||
"index": 0,
|
|
||||||
"type": "function"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"finish_reason": null,
|
|
||||||
"index": 0,
|
|
||||||
"logprobs": null
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"created": 1741263685,
|
|
||||||
"id": "",
|
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
|
||||||
"object": "chat.completion.chunk",
|
|
||||||
"system_fingerprint": "3.1.2-dev0-native",
|
|
||||||
"usage": null
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"choices": [
|
|
||||||
{
|
|
||||||
"delta": {
|
|
||||||
"content": null,
|
|
||||||
"role": "assistant",
|
|
||||||
"tool_calls": [
|
|
||||||
{
|
|
||||||
"function": {
|
|
||||||
"arguments": "\",",
|
|
||||||
"name": null
|
|
||||||
},
|
|
||||||
"id": "",
|
|
||||||
"index": 0,
|
|
||||||
"type": "function"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"finish_reason": null,
|
|
||||||
"index": 0,
|
|
||||||
"logprobs": null
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"created": 1741263685,
|
|
||||||
"id": "",
|
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
|
||||||
"object": "chat.completion.chunk",
|
|
||||||
"system_fingerprint": "3.1.2-dev0-native",
|
|
||||||
"usage": null
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"choices": [
|
|
||||||
{
|
|
||||||
"delta": {
|
|
||||||
"content": null,
|
|
||||||
"role": "assistant",
|
|
||||||
"tool_calls": [
|
|
||||||
{
|
|
||||||
"function": {
|
|
||||||
"arguments": " \"",
|
|
||||||
"name": null
|
|
||||||
},
|
|
||||||
"id": "",
|
|
||||||
"index": 0,
|
|
||||||
"type": "function"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"finish_reason": null,
|
|
||||||
"index": 0,
|
|
||||||
"logprobs": null
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"created": 1741263685,
|
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -401,7 +71,7 @@
|
|||||||
"arguments": "location",
|
"arguments": "location",
|
||||||
"name": null
|
"name": null
|
||||||
},
|
},
|
||||||
"id": "",
|
"id": "0",
|
||||||
"index": 0,
|
"index": 0,
|
||||||
"type": "function"
|
"type": "function"
|
||||||
}
|
}
|
||||||
@ -412,7 +82,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263685,
|
"created": 1741688515,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -431,7 +101,7 @@
|
|||||||
"arguments": "\":",
|
"arguments": "\":",
|
||||||
"name": null
|
"name": null
|
||||||
},
|
},
|
||||||
"id": "",
|
"id": "0",
|
||||||
"index": 0,
|
"index": 0,
|
||||||
"type": "function"
|
"type": "function"
|
||||||
}
|
}
|
||||||
@ -442,7 +112,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263685,
|
"created": 1741688515,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -461,7 +131,7 @@
|
|||||||
"arguments": " \"",
|
"arguments": " \"",
|
||||||
"name": null
|
"name": null
|
||||||
},
|
},
|
||||||
"id": "",
|
"id": "0",
|
||||||
"index": 0,
|
"index": 0,
|
||||||
"type": "function"
|
"type": "function"
|
||||||
}
|
}
|
||||||
@ -472,7 +142,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263685,
|
"created": 1741688515,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -488,10 +158,10 @@
|
|||||||
"tool_calls": [
|
"tool_calls": [
|
||||||
{
|
{
|
||||||
"function": {
|
"function": {
|
||||||
"arguments": "Paris",
|
"arguments": "Bro",
|
||||||
"name": null
|
"name": null
|
||||||
},
|
},
|
||||||
"id": "",
|
"id": "0",
|
||||||
"index": 0,
|
"index": 0,
|
||||||
"type": "function"
|
"type": "function"
|
||||||
}
|
}
|
||||||
@ -502,7 +172,37 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263685,
|
"created": 1741688515,
|
||||||
|
"id": "",
|
||||||
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": [
|
||||||
|
{
|
||||||
|
"function": {
|
||||||
|
"arguments": "oklyn",
|
||||||
|
"name": null
|
||||||
|
},
|
||||||
|
"id": "0",
|
||||||
|
"index": 0,
|
||||||
|
"type": "function"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1741688515,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -521,7 +221,7 @@
|
|||||||
"arguments": ",",
|
"arguments": ",",
|
||||||
"name": null
|
"name": null
|
||||||
},
|
},
|
||||||
"id": "",
|
"id": "0",
|
||||||
"index": 0,
|
"index": 0,
|
||||||
"type": "function"
|
"type": "function"
|
||||||
}
|
}
|
||||||
@ -532,7 +232,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263685,
|
"created": 1741688515,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -548,10 +248,10 @@
|
|||||||
"tool_calls": [
|
"tool_calls": [
|
||||||
{
|
{
|
||||||
"function": {
|
"function": {
|
||||||
"arguments": " France",
|
"arguments": " NY",
|
||||||
"name": null
|
"name": null
|
||||||
},
|
},
|
||||||
"id": "",
|
"id": "0",
|
||||||
"index": 0,
|
"index": 0,
|
||||||
"type": "function"
|
"type": "function"
|
||||||
}
|
}
|
||||||
@ -562,7 +262,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263685,
|
"created": 1741688515,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -581,7 +281,7 @@
|
|||||||
"arguments": "\",",
|
"arguments": "\",",
|
||||||
"name": null
|
"name": null
|
||||||
},
|
},
|
||||||
"id": "",
|
"id": "0",
|
||||||
"index": 0,
|
"index": 0,
|
||||||
"type": "function"
|
"type": "function"
|
||||||
}
|
}
|
||||||
@ -592,7 +292,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263685,
|
"created": 1741688515,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -611,7 +311,7 @@
|
|||||||
"arguments": " \"",
|
"arguments": " \"",
|
||||||
"name": null
|
"name": null
|
||||||
},
|
},
|
||||||
"id": "",
|
"id": "0",
|
||||||
"index": 0,
|
"index": 0,
|
||||||
"type": "function"
|
"type": "function"
|
||||||
}
|
}
|
||||||
@ -622,7 +322,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263685,
|
"created": 1741688515,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -641,7 +341,7 @@
|
|||||||
"arguments": "format",
|
"arguments": "format",
|
||||||
"name": null
|
"name": null
|
||||||
},
|
},
|
||||||
"id": "",
|
"id": "0",
|
||||||
"index": 0,
|
"index": 0,
|
||||||
"type": "function"
|
"type": "function"
|
||||||
}
|
}
|
||||||
@ -652,7 +352,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263685,
|
"created": 1741688515,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -671,7 +371,7 @@
|
|||||||
"arguments": "\":",
|
"arguments": "\":",
|
||||||
"name": null
|
"name": null
|
||||||
},
|
},
|
||||||
"id": "",
|
"id": "0",
|
||||||
"index": 0,
|
"index": 0,
|
||||||
"type": "function"
|
"type": "function"
|
||||||
}
|
}
|
||||||
@ -682,7 +382,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263685,
|
"created": 1741688515,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -701,7 +401,7 @@
|
|||||||
"arguments": " \"",
|
"arguments": " \"",
|
||||||
"name": null
|
"name": null
|
||||||
},
|
},
|
||||||
"id": "",
|
"id": "0",
|
||||||
"index": 0,
|
"index": 0,
|
||||||
"type": "function"
|
"type": "function"
|
||||||
}
|
}
|
||||||
@ -712,7 +412,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263685,
|
"created": 1741688515,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -728,10 +428,10 @@
|
|||||||
"tool_calls": [
|
"tool_calls": [
|
||||||
{
|
{
|
||||||
"function": {
|
"function": {
|
||||||
"arguments": "c",
|
"arguments": "f",
|
||||||
"name": null
|
"name": null
|
||||||
},
|
},
|
||||||
"id": "",
|
"id": "0",
|
||||||
"index": 0,
|
"index": 0,
|
||||||
"type": "function"
|
"type": "function"
|
||||||
}
|
}
|
||||||
@ -742,7 +442,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263685,
|
"created": 1741688515,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -758,10 +458,10 @@
|
|||||||
"tool_calls": [
|
"tool_calls": [
|
||||||
{
|
{
|
||||||
"function": {
|
"function": {
|
||||||
"arguments": "elsius",
|
"arguments": "ahrenheit",
|
||||||
"name": null
|
"name": null
|
||||||
},
|
},
|
||||||
"id": "",
|
"id": "0",
|
||||||
"index": 0,
|
"index": 0,
|
||||||
"type": "function"
|
"type": "function"
|
||||||
}
|
}
|
||||||
@ -772,7 +472,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263685,
|
"created": 1741688515,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -788,10 +488,10 @@
|
|||||||
"tool_calls": [
|
"tool_calls": [
|
||||||
{
|
{
|
||||||
"function": {
|
"function": {
|
||||||
"arguments": "\"}}",
|
"arguments": "\"}",
|
||||||
"name": null
|
"name": null
|
||||||
},
|
},
|
||||||
"id": "",
|
"id": "0",
|
||||||
"index": 0,
|
"index": 0,
|
||||||
"type": "function"
|
"type": "function"
|
||||||
}
|
}
|
||||||
@ -802,37 +502,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263685,
|
"created": 1741688515,
|
||||||
"id": "",
|
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
|
||||||
"object": "chat.completion.chunk",
|
|
||||||
"system_fingerprint": "3.1.2-dev0-native",
|
|
||||||
"usage": null
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"choices": [
|
|
||||||
{
|
|
||||||
"delta": {
|
|
||||||
"content": null,
|
|
||||||
"role": "assistant",
|
|
||||||
"tool_calls": [
|
|
||||||
{
|
|
||||||
"function": {
|
|
||||||
"arguments": "<|eot_id|>",
|
|
||||||
"name": null
|
|
||||||
},
|
|
||||||
"id": "",
|
|
||||||
"index": 0,
|
|
||||||
"type": "function"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"finish_reason": "stop",
|
|
||||||
"index": 0,
|
|
||||||
"logprobs": null
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"created": 1741263685,
|
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
|
@ -1,4 +1,24 @@
|
|||||||
[
|
[
|
||||||
|
{
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": "",
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": null
|
||||||
|
},
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1741364571,
|
||||||
|
"id": "",
|
||||||
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
@ -12,7 +32,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263687,
|
"created": 1741364571,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -32,7 +52,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263687,
|
"created": 1741364571,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -52,7 +72,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263687,
|
"created": 1741364571,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -72,7 +92,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263687,
|
"created": 1741364571,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -92,7 +112,27 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263687,
|
"created": 1741364571,
|
||||||
|
"id": "",
|
||||||
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": "!",
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": null
|
||||||
|
},
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1741364571,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
|
@ -10,7 +10,7 @@
|
|||||||
"tool_calls": [
|
"tool_calls": [
|
||||||
{
|
{
|
||||||
"function": {
|
"function": {
|
||||||
"arguments": "{\"format\":\"fahrenheit\",\"location\":\"Brooklyn, NY\"}",
|
"arguments": "{\"location\":\"Brooklyn, NY\",\"format\":\"fahrenheit\"}",
|
||||||
"description": null,
|
"description": null,
|
||||||
"name": "get_current_weather"
|
"name": "get_current_weather"
|
||||||
},
|
},
|
||||||
@ -21,7 +21,7 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263680,
|
"created": 1741372335,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion",
|
"object": "chat.completion",
|
||||||
|
@ -10,10 +10,10 @@
|
|||||||
"tool_calls": [
|
"tool_calls": [
|
||||||
{
|
{
|
||||||
"function": {
|
"function": {
|
||||||
"arguments": "{\"",
|
"arguments": "{",
|
||||||
"name": null
|
"name": "get_current_weather"
|
||||||
},
|
},
|
||||||
"id": "",
|
"id": "0",
|
||||||
"index": 0,
|
"index": 0,
|
||||||
"type": "function"
|
"type": "function"
|
||||||
}
|
}
|
||||||
@ -24,205 +24,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263681,
|
"created": 1741689423,
|
||||||
"id": "",
|
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
|
||||||
"object": "chat.completion.chunk",
|
|
||||||
"service_tier": null,
|
|
||||||
"system_fingerprint": "3.1.2-dev0-native",
|
|
||||||
"usage": null
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"choices": [
|
|
||||||
{
|
|
||||||
"delta": {
|
|
||||||
"content": null,
|
|
||||||
"function_call": null,
|
|
||||||
"refusal": null,
|
|
||||||
"role": "assistant",
|
|
||||||
"tool_calls": [
|
|
||||||
{
|
|
||||||
"function": {
|
|
||||||
"arguments": "function",
|
|
||||||
"name": null
|
|
||||||
},
|
|
||||||
"id": "",
|
|
||||||
"index": 0,
|
|
||||||
"type": "function"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"finish_reason": null,
|
|
||||||
"index": 0,
|
|
||||||
"logprobs": null
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"created": 1741263681,
|
|
||||||
"id": "",
|
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
|
||||||
"object": "chat.completion.chunk",
|
|
||||||
"service_tier": null,
|
|
||||||
"system_fingerprint": "3.1.2-dev0-native",
|
|
||||||
"usage": null
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"choices": [
|
|
||||||
{
|
|
||||||
"delta": {
|
|
||||||
"content": null,
|
|
||||||
"function_call": null,
|
|
||||||
"refusal": null,
|
|
||||||
"role": "assistant",
|
|
||||||
"tool_calls": [
|
|
||||||
{
|
|
||||||
"function": {
|
|
||||||
"arguments": "\":",
|
|
||||||
"name": null
|
|
||||||
},
|
|
||||||
"id": "",
|
|
||||||
"index": 0,
|
|
||||||
"type": "function"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"finish_reason": null,
|
|
||||||
"index": 0,
|
|
||||||
"logprobs": null
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"created": 1741263681,
|
|
||||||
"id": "",
|
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
|
||||||
"object": "chat.completion.chunk",
|
|
||||||
"service_tier": null,
|
|
||||||
"system_fingerprint": "3.1.2-dev0-native",
|
|
||||||
"usage": null
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"choices": [
|
|
||||||
{
|
|
||||||
"delta": {
|
|
||||||
"content": null,
|
|
||||||
"function_call": null,
|
|
||||||
"refusal": null,
|
|
||||||
"role": "assistant",
|
|
||||||
"tool_calls": [
|
|
||||||
{
|
|
||||||
"function": {
|
|
||||||
"arguments": " {\"",
|
|
||||||
"name": null
|
|
||||||
},
|
|
||||||
"id": "",
|
|
||||||
"index": 0,
|
|
||||||
"type": "function"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"finish_reason": null,
|
|
||||||
"index": 0,
|
|
||||||
"logprobs": null
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"created": 1741263681,
|
|
||||||
"id": "",
|
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
|
||||||
"object": "chat.completion.chunk",
|
|
||||||
"service_tier": null,
|
|
||||||
"system_fingerprint": "3.1.2-dev0-native",
|
|
||||||
"usage": null
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"choices": [
|
|
||||||
{
|
|
||||||
"delta": {
|
|
||||||
"content": null,
|
|
||||||
"function_call": null,
|
|
||||||
"refusal": null,
|
|
||||||
"role": "assistant",
|
|
||||||
"tool_calls": [
|
|
||||||
{
|
|
||||||
"function": {
|
|
||||||
"arguments": "_",
|
|
||||||
"name": null
|
|
||||||
},
|
|
||||||
"id": "",
|
|
||||||
"index": 0,
|
|
||||||
"type": "function"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"finish_reason": null,
|
|
||||||
"index": 0,
|
|
||||||
"logprobs": null
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"created": 1741263681,
|
|
||||||
"id": "",
|
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
|
||||||
"object": "chat.completion.chunk",
|
|
||||||
"service_tier": null,
|
|
||||||
"system_fingerprint": "3.1.2-dev0-native",
|
|
||||||
"usage": null
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"choices": [
|
|
||||||
{
|
|
||||||
"delta": {
|
|
||||||
"content": null,
|
|
||||||
"function_call": null,
|
|
||||||
"refusal": null,
|
|
||||||
"role": "assistant",
|
|
||||||
"tool_calls": [
|
|
||||||
{
|
|
||||||
"function": {
|
|
||||||
"arguments": "name",
|
|
||||||
"name": null
|
|
||||||
},
|
|
||||||
"id": "",
|
|
||||||
"index": 0,
|
|
||||||
"type": "function"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"finish_reason": null,
|
|
||||||
"index": 0,
|
|
||||||
"logprobs": null
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"created": 1741263681,
|
|
||||||
"id": "",
|
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
|
||||||
"object": "chat.completion.chunk",
|
|
||||||
"service_tier": null,
|
|
||||||
"system_fingerprint": "3.1.2-dev0-native",
|
|
||||||
"usage": null
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"choices": [
|
|
||||||
{
|
|
||||||
"delta": {
|
|
||||||
"content": null,
|
|
||||||
"function_call": null,
|
|
||||||
"refusal": null,
|
|
||||||
"role": "assistant",
|
|
||||||
"tool_calls": [
|
|
||||||
{
|
|
||||||
"function": {
|
|
||||||
"arguments": "\":",
|
|
||||||
"name": null
|
|
||||||
},
|
|
||||||
"id": "",
|
|
||||||
"index": 0,
|
|
||||||
"type": "function"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"finish_reason": null,
|
|
||||||
"index": 0,
|
|
||||||
"logprobs": null
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"created": 1741263681,
|
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -244,7 +46,7 @@
|
|||||||
"arguments": " \"",
|
"arguments": " \"",
|
||||||
"name": null
|
"name": null
|
||||||
},
|
},
|
||||||
"id": "",
|
"id": "0",
|
||||||
"index": 0,
|
"index": 0,
|
||||||
"type": "function"
|
"type": "function"
|
||||||
}
|
}
|
||||||
@ -255,172 +57,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263681,
|
"created": 1741689423,
|
||||||
"id": "",
|
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
|
||||||
"object": "chat.completion.chunk",
|
|
||||||
"service_tier": null,
|
|
||||||
"system_fingerprint": "3.1.2-dev0-native",
|
|
||||||
"usage": null
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"choices": [
|
|
||||||
{
|
|
||||||
"delta": {
|
|
||||||
"content": null,
|
|
||||||
"function_call": null,
|
|
||||||
"refusal": null,
|
|
||||||
"role": "assistant",
|
|
||||||
"tool_calls": [
|
|
||||||
{
|
|
||||||
"function": {
|
|
||||||
"arguments": "get",
|
|
||||||
"name": null
|
|
||||||
},
|
|
||||||
"id": "",
|
|
||||||
"index": 0,
|
|
||||||
"type": "function"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"finish_reason": null,
|
|
||||||
"index": 0,
|
|
||||||
"logprobs": null
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"created": 1741263681,
|
|
||||||
"id": "",
|
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
|
||||||
"object": "chat.completion.chunk",
|
|
||||||
"service_tier": null,
|
|
||||||
"system_fingerprint": "3.1.2-dev0-native",
|
|
||||||
"usage": null
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"choices": [
|
|
||||||
{
|
|
||||||
"delta": {
|
|
||||||
"content": null,
|
|
||||||
"function_call": null,
|
|
||||||
"refusal": null,
|
|
||||||
"role": "assistant",
|
|
||||||
"tool_calls": [
|
|
||||||
{
|
|
||||||
"function": {
|
|
||||||
"arguments": "_current",
|
|
||||||
"name": null
|
|
||||||
},
|
|
||||||
"id": "",
|
|
||||||
"index": 0,
|
|
||||||
"type": "function"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"finish_reason": null,
|
|
||||||
"index": 0,
|
|
||||||
"logprobs": null
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"created": 1741263681,
|
|
||||||
"id": "",
|
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
|
||||||
"object": "chat.completion.chunk",
|
|
||||||
"service_tier": null,
|
|
||||||
"system_fingerprint": "3.1.2-dev0-native",
|
|
||||||
"usage": null
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"choices": [
|
|
||||||
{
|
|
||||||
"delta": {
|
|
||||||
"content": null,
|
|
||||||
"function_call": null,
|
|
||||||
"refusal": null,
|
|
||||||
"role": "assistant",
|
|
||||||
"tool_calls": [
|
|
||||||
{
|
|
||||||
"function": {
|
|
||||||
"arguments": "_weather",
|
|
||||||
"name": null
|
|
||||||
},
|
|
||||||
"id": "",
|
|
||||||
"index": 0,
|
|
||||||
"type": "function"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"finish_reason": null,
|
|
||||||
"index": 0,
|
|
||||||
"logprobs": null
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"created": 1741263681,
|
|
||||||
"id": "",
|
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
|
||||||
"object": "chat.completion.chunk",
|
|
||||||
"service_tier": null,
|
|
||||||
"system_fingerprint": "3.1.2-dev0-native",
|
|
||||||
"usage": null
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"choices": [
|
|
||||||
{
|
|
||||||
"delta": {
|
|
||||||
"content": null,
|
|
||||||
"function_call": null,
|
|
||||||
"refusal": null,
|
|
||||||
"role": "assistant",
|
|
||||||
"tool_calls": [
|
|
||||||
{
|
|
||||||
"function": {
|
|
||||||
"arguments": "\",",
|
|
||||||
"name": null
|
|
||||||
},
|
|
||||||
"id": "",
|
|
||||||
"index": 0,
|
|
||||||
"type": "function"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"finish_reason": null,
|
|
||||||
"index": 0,
|
|
||||||
"logprobs": null
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"created": 1741263681,
|
|
||||||
"id": "",
|
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
|
||||||
"object": "chat.completion.chunk",
|
|
||||||
"service_tier": null,
|
|
||||||
"system_fingerprint": "3.1.2-dev0-native",
|
|
||||||
"usage": null
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"choices": [
|
|
||||||
{
|
|
||||||
"delta": {
|
|
||||||
"content": null,
|
|
||||||
"function_call": null,
|
|
||||||
"refusal": null,
|
|
||||||
"role": "assistant",
|
|
||||||
"tool_calls": [
|
|
||||||
{
|
|
||||||
"function": {
|
|
||||||
"arguments": " \"",
|
|
||||||
"name": null
|
|
||||||
},
|
|
||||||
"id": "",
|
|
||||||
"index": 0,
|
|
||||||
"type": "function"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"finish_reason": null,
|
|
||||||
"index": 0,
|
|
||||||
"logprobs": null
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"created": 1741263681,
|
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -442,7 +79,7 @@
|
|||||||
"arguments": "location",
|
"arguments": "location",
|
||||||
"name": null
|
"name": null
|
||||||
},
|
},
|
||||||
"id": "",
|
"id": "0",
|
||||||
"index": 0,
|
"index": 0,
|
||||||
"type": "function"
|
"type": "function"
|
||||||
}
|
}
|
||||||
@ -453,7 +90,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263681,
|
"created": 1741689423,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -475,7 +112,7 @@
|
|||||||
"arguments": "\":",
|
"arguments": "\":",
|
||||||
"name": null
|
"name": null
|
||||||
},
|
},
|
||||||
"id": "",
|
"id": "0",
|
||||||
"index": 0,
|
"index": 0,
|
||||||
"type": "function"
|
"type": "function"
|
||||||
}
|
}
|
||||||
@ -486,7 +123,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263681,
|
"created": 1741689423,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -508,7 +145,7 @@
|
|||||||
"arguments": " \"",
|
"arguments": " \"",
|
||||||
"name": null
|
"name": null
|
||||||
},
|
},
|
||||||
"id": "",
|
"id": "0",
|
||||||
"index": 0,
|
"index": 0,
|
||||||
"type": "function"
|
"type": "function"
|
||||||
}
|
}
|
||||||
@ -519,7 +156,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263681,
|
"created": 1741689423,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -541,7 +178,7 @@
|
|||||||
"arguments": "Bro",
|
"arguments": "Bro",
|
||||||
"name": null
|
"name": null
|
||||||
},
|
},
|
||||||
"id": "",
|
"id": "0",
|
||||||
"index": 0,
|
"index": 0,
|
||||||
"type": "function"
|
"type": "function"
|
||||||
}
|
}
|
||||||
@ -552,7 +189,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263681,
|
"created": 1741689423,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -574,7 +211,7 @@
|
|||||||
"arguments": "oklyn",
|
"arguments": "oklyn",
|
||||||
"name": null
|
"name": null
|
||||||
},
|
},
|
||||||
"id": "",
|
"id": "0",
|
||||||
"index": 0,
|
"index": 0,
|
||||||
"type": "function"
|
"type": "function"
|
||||||
}
|
}
|
||||||
@ -585,7 +222,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263681,
|
"created": 1741689423,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -607,7 +244,7 @@
|
|||||||
"arguments": ",",
|
"arguments": ",",
|
||||||
"name": null
|
"name": null
|
||||||
},
|
},
|
||||||
"id": "",
|
"id": "0",
|
||||||
"index": 0,
|
"index": 0,
|
||||||
"type": "function"
|
"type": "function"
|
||||||
}
|
}
|
||||||
@ -618,7 +255,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263681,
|
"created": 1741689423,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -640,7 +277,7 @@
|
|||||||
"arguments": " NY",
|
"arguments": " NY",
|
||||||
"name": null
|
"name": null
|
||||||
},
|
},
|
||||||
"id": "",
|
"id": "0",
|
||||||
"index": 0,
|
"index": 0,
|
||||||
"type": "function"
|
"type": "function"
|
||||||
}
|
}
|
||||||
@ -651,7 +288,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263681,
|
"created": 1741689423,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -673,7 +310,7 @@
|
|||||||
"arguments": "\",",
|
"arguments": "\",",
|
||||||
"name": null
|
"name": null
|
||||||
},
|
},
|
||||||
"id": "",
|
"id": "0",
|
||||||
"index": 0,
|
"index": 0,
|
||||||
"type": "function"
|
"type": "function"
|
||||||
}
|
}
|
||||||
@ -684,7 +321,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263681,
|
"created": 1741689423,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -706,7 +343,7 @@
|
|||||||
"arguments": " \"",
|
"arguments": " \"",
|
||||||
"name": null
|
"name": null
|
||||||
},
|
},
|
||||||
"id": "",
|
"id": "0",
|
||||||
"index": 0,
|
"index": 0,
|
||||||
"type": "function"
|
"type": "function"
|
||||||
}
|
}
|
||||||
@ -717,7 +354,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263681,
|
"created": 1741689423,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -739,7 +376,7 @@
|
|||||||
"arguments": "format",
|
"arguments": "format",
|
||||||
"name": null
|
"name": null
|
||||||
},
|
},
|
||||||
"id": "",
|
"id": "0",
|
||||||
"index": 0,
|
"index": 0,
|
||||||
"type": "function"
|
"type": "function"
|
||||||
}
|
}
|
||||||
@ -750,7 +387,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263681,
|
"created": 1741689423,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -772,7 +409,7 @@
|
|||||||
"arguments": "\":",
|
"arguments": "\":",
|
||||||
"name": null
|
"name": null
|
||||||
},
|
},
|
||||||
"id": "",
|
"id": "0",
|
||||||
"index": 0,
|
"index": 0,
|
||||||
"type": "function"
|
"type": "function"
|
||||||
}
|
}
|
||||||
@ -783,7 +420,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263681,
|
"created": 1741689423,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -805,7 +442,7 @@
|
|||||||
"arguments": " \"",
|
"arguments": " \"",
|
||||||
"name": null
|
"name": null
|
||||||
},
|
},
|
||||||
"id": "",
|
"id": "0",
|
||||||
"index": 0,
|
"index": 0,
|
||||||
"type": "function"
|
"type": "function"
|
||||||
}
|
}
|
||||||
@ -816,7 +453,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263681,
|
"created": 1741689423,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -838,7 +475,7 @@
|
|||||||
"arguments": "f",
|
"arguments": "f",
|
||||||
"name": null
|
"name": null
|
||||||
},
|
},
|
||||||
"id": "",
|
"id": "0",
|
||||||
"index": 0,
|
"index": 0,
|
||||||
"type": "function"
|
"type": "function"
|
||||||
}
|
}
|
||||||
@ -849,7 +486,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263681,
|
"created": 1741689423,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -871,7 +508,7 @@
|
|||||||
"arguments": "ahrenheit",
|
"arguments": "ahrenheit",
|
||||||
"name": null
|
"name": null
|
||||||
},
|
},
|
||||||
"id": "",
|
"id": "0",
|
||||||
"index": 0,
|
"index": 0,
|
||||||
"type": "function"
|
"type": "function"
|
||||||
}
|
}
|
||||||
@ -882,7 +519,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263681,
|
"created": 1741689423,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -901,10 +538,10 @@
|
|||||||
"tool_calls": [
|
"tool_calls": [
|
||||||
{
|
{
|
||||||
"function": {
|
"function": {
|
||||||
"arguments": "\"}}",
|
"arguments": "\"}",
|
||||||
"name": null
|
"name": null
|
||||||
},
|
},
|
||||||
"id": "",
|
"id": "0",
|
||||||
"index": 0,
|
"index": 0,
|
||||||
"type": "function"
|
"type": "function"
|
||||||
}
|
}
|
||||||
@ -915,40 +552,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263681,
|
"created": 1741689423,
|
||||||
"id": "",
|
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
|
||||||
"object": "chat.completion.chunk",
|
|
||||||
"service_tier": null,
|
|
||||||
"system_fingerprint": "3.1.2-dev0-native",
|
|
||||||
"usage": null
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"choices": [
|
|
||||||
{
|
|
||||||
"delta": {
|
|
||||||
"content": null,
|
|
||||||
"function_call": null,
|
|
||||||
"refusal": null,
|
|
||||||
"role": "assistant",
|
|
||||||
"tool_calls": [
|
|
||||||
{
|
|
||||||
"function": {
|
|
||||||
"arguments": "<|eot_id|>",
|
|
||||||
"name": null
|
|
||||||
},
|
|
||||||
"id": "",
|
|
||||||
"index": 0,
|
|
||||||
"type": "function"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"finish_reason": "stop",
|
|
||||||
"index": 0,
|
|
||||||
"logprobs": null
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"created": 1741263681,
|
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
|
@ -1,4 +1,24 @@
|
|||||||
[
|
[
|
||||||
|
{
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": "",
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": null
|
||||||
|
},
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1741371722,
|
||||||
|
"id": "",
|
||||||
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
@ -12,7 +32,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263688,
|
"created": 1741371722,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -32,7 +52,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263688,
|
"created": 1741371722,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -52,7 +72,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263688,
|
"created": 1741371722,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -72,7 +92,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263688,
|
"created": 1741371722,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -92,7 +112,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263688,
|
"created": 1741371722,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -112,7 +132,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263688,
|
"created": 1741371722,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -132,7 +152,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263688,
|
"created": 1741371722,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -152,7 +172,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263688,
|
"created": 1741371722,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -172,7 +192,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263688,
|
"created": 1741371722,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -192,7 +212,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263688,
|
"created": 1741371722,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -212,7 +232,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263688,
|
"created": 1741371722,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -232,7 +252,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263688,
|
"created": 1741371722,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -252,7 +272,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263688,
|
"created": 1741371722,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -272,7 +292,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263688,
|
"created": 1741371722,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -292,7 +312,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263688,
|
"created": 1741371722,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -312,7 +332,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263688,
|
"created": 1741371722,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -332,7 +352,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263688,
|
"created": 1741371722,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -352,7 +372,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263688,
|
"created": 1741371723,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -372,7 +392,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263688,
|
"created": 1741371723,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -392,7 +412,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263688,
|
"created": 1741371723,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -412,7 +432,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263688,
|
"created": 1741371723,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -432,7 +452,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263688,
|
"created": 1741371723,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -452,7 +472,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263688,
|
"created": 1741371723,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -472,7 +492,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263688,
|
"created": 1741371723,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -492,7 +512,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263688,
|
"created": 1741371723,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -512,7 +532,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263689,
|
"created": 1741371723,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -532,7 +552,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263689,
|
"created": 1741371723,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -552,7 +572,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263689,
|
"created": 1741371723,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -572,7 +592,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263689,
|
"created": 1741371723,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -592,7 +612,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263689,
|
"created": 1741371723,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -612,7 +632,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263689,
|
"created": 1741371723,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -632,7 +652,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263689,
|
"created": 1741371723,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -652,7 +672,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263689,
|
"created": 1741371723,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -672,7 +692,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263689,
|
"created": 1741371723,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -692,7 +712,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263689,
|
"created": 1741371723,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -712,7 +732,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263689,
|
"created": 1741371723,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -732,7 +752,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263689,
|
"created": 1741371723,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -752,7 +772,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263689,
|
"created": 1741371723,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -772,7 +792,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263689,
|
"created": 1741371723,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -792,7 +812,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263689,
|
"created": 1741371723,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -812,7 +832,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263689,
|
"created": 1741371723,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -832,7 +852,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263689,
|
"created": 1741371723,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -852,7 +872,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263689,
|
"created": 1741371723,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -872,7 +892,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263689,
|
"created": 1741371723,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -892,7 +912,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263689,
|
"created": 1741371723,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -912,7 +932,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263689,
|
"created": 1741371723,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -932,7 +952,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263689,
|
"created": 1741371723,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -952,7 +972,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263689,
|
"created": 1741371724,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -972,7 +992,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263689,
|
"created": 1741371724,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -992,7 +1012,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263689,
|
"created": 1741371724,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -1012,7 +1032,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263689,
|
"created": 1741371724,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -1032,7 +1052,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263689,
|
"created": 1741371724,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -1052,7 +1072,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263689,
|
"created": 1741371724,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -1072,7 +1092,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263689,
|
"created": 1741371724,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -1092,7 +1112,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263690,
|
"created": 1741371724,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -1112,7 +1132,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263690,
|
"created": 1741371724,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -1132,7 +1152,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263690,
|
"created": 1741371724,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -1152,7 +1172,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263690,
|
"created": 1741371724,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -1172,7 +1192,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263690,
|
"created": 1741371724,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -1192,7 +1212,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263690,
|
"created": 1741371724,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -1212,7 +1232,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263690,
|
"created": 1741371724,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -1232,7 +1252,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263690,
|
"created": 1741371724,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -1252,7 +1272,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263690,
|
"created": 1741371724,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -1272,7 +1292,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263690,
|
"created": 1741371724,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -1292,7 +1312,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263690,
|
"created": 1741371724,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -1312,7 +1332,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263690,
|
"created": 1741371724,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -1332,7 +1352,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263690,
|
"created": 1741371724,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -1352,7 +1372,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263690,
|
"created": 1741371724,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -1372,7 +1392,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263690,
|
"created": 1741371724,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -1392,7 +1412,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263690,
|
"created": 1741371724,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -1412,7 +1432,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263690,
|
"created": 1741371724,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -1432,7 +1452,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263690,
|
"created": 1741371724,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -1452,7 +1472,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263690,
|
"created": 1741371724,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -1472,7 +1492,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263690,
|
"created": 1741371724,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -1492,7 +1512,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263690,
|
"created": 1741371724,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -1512,7 +1532,7 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263690,
|
"created": 1741371724,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -1532,7 +1552,27 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1741263690,
|
"created": 1741371724,
|
||||||
|
"id": "",
|
||||||
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": ".",
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": null
|
||||||
|
},
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1741371725,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -108,7 +108,7 @@ async def test_flash_llama_grammar_tools_nostream(
|
|||||||
function=ChatCompletionOutputFunctionDefinition(
|
function=ChatCompletionOutputFunctionDefinition(
|
||||||
description=None,
|
description=None,
|
||||||
name="get_current_weather",
|
name="get_current_weather",
|
||||||
arguments='{"format":"fahrenheit","location":"Brooklyn, NY"}',
|
arguments='{"location":"Brooklyn, NY","format":"fahrenheit"}',
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
@ -142,14 +142,15 @@ async def test_flash_llama_grammar_tools_openai(
|
|||||||
|
|
||||||
chunks = []
|
chunks = []
|
||||||
tool = ""
|
tool = ""
|
||||||
|
name = ""
|
||||||
for chunk in stream:
|
for chunk in stream:
|
||||||
|
if chunk.choices[0].delta.tool_calls[0].function.name:
|
||||||
|
name += chunk.choices[0].delta.tool_calls[0].function.name
|
||||||
tool += chunk.choices[0].delta.tool_calls[0].function.arguments
|
tool += chunk.choices[0].delta.tool_calls[0].function.arguments
|
||||||
chunks.append(chunk)
|
chunks.append(chunk)
|
||||||
|
|
||||||
assert (
|
assert name == "get_current_weather"
|
||||||
tool
|
assert tool == '{ "location": "Brooklyn, NY", "format": "fahrenheit"}'
|
||||||
== '{"function": {"_name": "get_current_weather", "location": "Brooklyn, NY", "format": "fahrenheit"}}<|eot_id|>'
|
|
||||||
)
|
|
||||||
assert chunks == response_snapshot
|
assert chunks == response_snapshot
|
||||||
|
|
||||||
|
|
||||||
@ -184,7 +185,7 @@ async def test_flash_llama_grammar_tools_auto_nostream(
|
|||||||
function=ChatCompletionOutputFunctionDefinition(
|
function=ChatCompletionOutputFunctionDefinition(
|
||||||
description=None,
|
description=None,
|
||||||
name="get_current_weather",
|
name="get_current_weather",
|
||||||
arguments='{"format":"fahrenheit","location":"Brooklyn, NY"}',
|
arguments='{"location":"Brooklyn, NY","format":"fahrenheit"}',
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
@ -223,7 +224,7 @@ async def test_flash_llama_grammar_tools_choice_nostream(
|
|||||||
function=ChatCompletionOutputFunctionDefinition(
|
function=ChatCompletionOutputFunctionDefinition(
|
||||||
description=None,
|
description=None,
|
||||||
name="get_current_weather",
|
name="get_current_weather",
|
||||||
arguments='{"format":"fahrenheit","location":"Brooklyn, NY"}',
|
arguments='{"location":"Brooklyn, NY","format":"fahrenheit"}',
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
@ -250,23 +251,24 @@ async def test_flash_llama_grammar_tools_choice_stream(
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
"content": "What is the weather like in Paris, France?",
|
"content": "What is the weather like in Brooklyn, New York?",
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
stream=True,
|
stream=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
tool_calls_generated = ""
|
arguments = ""
|
||||||
chunks = []
|
chunks = []
|
||||||
|
name = ""
|
||||||
for chunk in stream:
|
for chunk in stream:
|
||||||
tool_calls_generated += chunk.choices[0].delta.tool_calls[0].function.arguments
|
if chunk.choices[0].delta.tool_calls[0].function.name:
|
||||||
|
name += chunk.choices[0].delta.tool_calls[0].function.name
|
||||||
|
arguments += chunk.choices[0].delta.tool_calls[0].function.arguments
|
||||||
assert chunk.choices[0].delta.content is None
|
assert chunk.choices[0].delta.content is None
|
||||||
chunks.append(chunk)
|
chunks.append(chunk)
|
||||||
|
|
||||||
assert (
|
assert name == "get_current_weather"
|
||||||
tool_calls_generated
|
assert arguments == '{ "location": "Brooklyn, NY", "format": "fahrenheit"}'
|
||||||
== '{"function": {"_name": "get_current_weather", "location": "Paris, France", "format": "celsius"}}<|eot_id|>'
|
|
||||||
)
|
|
||||||
assert chunks == response_snapshot
|
assert chunks == response_snapshot
|
||||||
|
|
||||||
|
|
||||||
@ -297,8 +299,6 @@ async def test_flash_llama_grammar_tools_insufficient_information_nostream(
|
|||||||
content_generated = response.choices[0].message.content
|
content_generated = response.choices[0].message.content
|
||||||
assert response.choices[0].message.tool_calls is None
|
assert response.choices[0].message.tool_calls is None
|
||||||
|
|
||||||
######## FIXME before MERGE ############################
|
|
||||||
# TODO This is different from the streaming case, this is NOT normal.
|
|
||||||
assert content_generated == "I am a helpful assistant!"
|
assert content_generated == "I am a helpful assistant!"
|
||||||
assert response == response_snapshot
|
assert response == response_snapshot
|
||||||
|
|
||||||
@ -334,7 +334,8 @@ async def test_flash_llama_grammar_tools_insufficient_information_stream(
|
|||||||
chunks.append(chunk)
|
chunks.append(chunk)
|
||||||
assert chunk.choices[0].delta.tool_calls is None
|
assert chunk.choices[0].delta.tool_calls is None
|
||||||
|
|
||||||
assert content_generated == "I am a helpful assistant"
|
######## This is exactly the same as the non streaming case
|
||||||
|
assert content_generated == "I am a helpful assistant!"
|
||||||
assert chunks == response_snapshot
|
assert chunks == response_snapshot
|
||||||
|
|
||||||
|
|
||||||
@ -371,7 +372,7 @@ async def test_flash_llama_grammar_tools_sea_creatures_stream_auto(
|
|||||||
|
|
||||||
assert (
|
assert (
|
||||||
content_generated
|
content_generated
|
||||||
== "There was a wise old octopus named Oracle. He lived in a cozy little cave beneath the waves with his best friend, a curious seahorse named Finley. One day, Finley met a playful dolphin named Daisy, and the three became inseparable. They spent their days exploring the ocean, playing hide-and-seek, and learning about the wonders of the sea from Oracle"
|
== "There was a wise old octopus named Oracle. He lived in a cozy little cave beneath the waves with his best friend, a curious seahorse named Finley. One day, Finley met a playful dolphin named Daisy, and the three became inseparable. They spent their days exploring the ocean, playing hide-and-seek, and learning about the wonders of the sea from Oracle."
|
||||||
)
|
)
|
||||||
assert chunks == response_snapshot
|
assert chunks == response_snapshot
|
||||||
|
|
||||||
@ -401,14 +402,18 @@ async def test_flash_llama_grammar_tools_sea_creatures_stream_required(
|
|||||||
)
|
)
|
||||||
|
|
||||||
tool_calls_generated = ""
|
tool_calls_generated = ""
|
||||||
|
name = ""
|
||||||
chunks = []
|
chunks = []
|
||||||
for chunk in stream:
|
for chunk in stream:
|
||||||
assert chunk.choices[0].delta.content is None
|
assert chunk.choices[0].delta.content is None
|
||||||
|
if chunk.choices[0].delta.tool_calls[0].function.name:
|
||||||
|
name += chunk.choices[0].delta.tool_calls[0].function.name
|
||||||
tool_calls_generated += chunk.choices[0].delta.tool_calls[0].function.arguments
|
tool_calls_generated += chunk.choices[0].delta.tool_calls[0].function.arguments
|
||||||
|
|
||||||
|
assert name == "get_n_day_weather_forecast"
|
||||||
assert (
|
assert (
|
||||||
tool_calls_generated
|
tool_calls_generated
|
||||||
== '{"function": {"_name": "get_n_day_weather_forecast", "location": "San Francisco, CA", "format": "fahrenheit", "num_days":3}}<|eot_id|>'
|
== '{ "location": "San Francisco, CA", "format": "fahrenheit", "num_days":3}'
|
||||||
)
|
)
|
||||||
assert chunks == response_snapshot
|
assert chunks == response_snapshot
|
||||||
|
|
||||||
@ -479,12 +484,17 @@ async def test_flash_llama_grammar_tools_sea_creatures_stream_function_object(
|
|||||||
)
|
)
|
||||||
chunks = []
|
chunks = []
|
||||||
tool_calls_generated = ""
|
tool_calls_generated = ""
|
||||||
|
name = ""
|
||||||
for chunk in stream:
|
for chunk in stream:
|
||||||
|
assert chunk.choices[0].delta.content is None
|
||||||
|
if chunk.choices[0].delta.tool_calls[0].function.name:
|
||||||
|
name += chunk.choices[0].delta.tool_calls[0].function.name
|
||||||
tool_calls_generated += chunk.choices[0].delta.tool_calls[0].function.arguments
|
tool_calls_generated += chunk.choices[0].delta.tool_calls[0].function.arguments
|
||||||
chunks.append(chunk)
|
|
||||||
|
assert name == "get_n_day_weather_forecast"
|
||||||
assert (
|
assert (
|
||||||
tool_calls_generated
|
tool_calls_generated
|
||||||
== '{"function": {"_name": "get_n_day_weather_forecast", "location": "San Francisco, CA", "format": "celsius", "num_days": 3}}<|eot_id|>'
|
== '{ "location": "San Francisco, CA", "format": "celsius", "num_days": 3}'
|
||||||
)
|
)
|
||||||
assert chunks == response_snapshot
|
assert chunks == response_snapshot
|
||||||
|
|
||||||
|
978
router/src/chat.rs
Normal file
978
router/src/chat.rs
Normal file
@ -0,0 +1,978 @@
|
|||||||
|
use crate::{
|
||||||
|
infer::InferError, ChatCompletionChoice, ChatCompletionChunk, ChatCompletionDelta,
|
||||||
|
ChatCompletionLogprobs, CompletionType, DeltaToolCall, Function, FunctionDefinition,
|
||||||
|
StreamOptions, StreamResponse, TextMessage, ToolCallDelta, Usage,
|
||||||
|
};
|
||||||
|
use serde::Deserialize;
|
||||||
|
use serde_json::Value;
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
#[serde(rename_all = "snake_case")]
|
||||||
|
enum _NoTool {
|
||||||
|
NoTool,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
struct NoToolCall {
|
||||||
|
_name: _NoTool,
|
||||||
|
content: String,
|
||||||
|
}
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
struct NoTool {
|
||||||
|
function: NoToolCall,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
struct ToolCall {
|
||||||
|
_name: String,
|
||||||
|
#[serde(flatten, default)]
|
||||||
|
/// Using Map to preserve order
|
||||||
|
arguments: serde_json::Map<String, Value>,
|
||||||
|
}
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
struct Call {
|
||||||
|
function: ToolCall,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn parse_output(
|
||||||
|
generated_text: &str,
|
||||||
|
) -> Result<(Option<Vec<crate::ToolCall>>, Option<String>), InferError> {
|
||||||
|
let call: Call = serde_json::from_str(generated_text).map_err(|e| {
|
||||||
|
InferError::ToolError(format!(
|
||||||
|
"Failed to parse generated text: {} {:?}",
|
||||||
|
e, generated_text
|
||||||
|
))
|
||||||
|
})?;
|
||||||
|
let name = call.function._name;
|
||||||
|
|
||||||
|
match &name[..] {
|
||||||
|
"no_tool" => {
|
||||||
|
// parse the content message
|
||||||
|
let content_message = call
|
||||||
|
.function
|
||||||
|
.arguments
|
||||||
|
.get("content")
|
||||||
|
.and_then(Value::as_str)
|
||||||
|
.ok_or_else(|| {
|
||||||
|
InferError::ToolError("No `content` found in generated text".to_string())
|
||||||
|
})?
|
||||||
|
.to_string();
|
||||||
|
Ok((None, Some(content_message)))
|
||||||
|
}
|
||||||
|
name => {
|
||||||
|
let tool_calls = vec![crate::ToolCall {
|
||||||
|
id: "0".to_string(),
|
||||||
|
r#type: "function".to_string(),
|
||||||
|
function: FunctionDefinition {
|
||||||
|
description: None,
|
||||||
|
name: name.to_string(),
|
||||||
|
arguments: serde_json::to_value(call.function.arguments).map_err(|err| {
|
||||||
|
InferError::ToolError(format!(
|
||||||
|
"Could not convert arguments to JSON map {err}"
|
||||||
|
))
|
||||||
|
})?,
|
||||||
|
},
|
||||||
|
}];
|
||||||
|
Ok((Some(tool_calls), None))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Convert a StreamResponse into an Event to be sent over SSE
|
||||||
|
fn create_event_from_stream_token(
|
||||||
|
stream_token: &StreamResponse,
|
||||||
|
logprobs: bool,
|
||||||
|
inner_using_tools: bool,
|
||||||
|
system_fingerprint: String,
|
||||||
|
model_id: String,
|
||||||
|
function_name: Option<String>,
|
||||||
|
id: String,
|
||||||
|
) -> CompletionType {
|
||||||
|
let current_time = std::time::SystemTime::now()
|
||||||
|
.duration_since(std::time::UNIX_EPOCH)
|
||||||
|
.unwrap_or_else(|_| std::time::Duration::from_secs(0))
|
||||||
|
.as_secs();
|
||||||
|
|
||||||
|
let logprobs = logprobs.then(|| {
|
||||||
|
ChatCompletionLogprobs::from((stream_token.token.clone(), stream_token.top_tokens.clone()))
|
||||||
|
});
|
||||||
|
|
||||||
|
// replace the content with the tool calls if grammar is present
|
||||||
|
let content = if !stream_token.token.special {
|
||||||
|
Some(stream_token.token.text.clone())
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
let (content, tool_calls) = if inner_using_tools {
|
||||||
|
// Cast into a vec
|
||||||
|
(None, content)
|
||||||
|
} else {
|
||||||
|
(content, None)
|
||||||
|
};
|
||||||
|
let finish_reason = stream_token
|
||||||
|
.details
|
||||||
|
.as_ref()
|
||||||
|
.map(|details| details.finish_reason.format(true));
|
||||||
|
let delta = match (content, tool_calls) {
|
||||||
|
(Some(delta), _) => ChatCompletionDelta::Chat(TextMessage {
|
||||||
|
role: "assistant".to_string(),
|
||||||
|
content: delta,
|
||||||
|
..Default::default()
|
||||||
|
}),
|
||||||
|
(None, Some(tool_calls)) => ChatCompletionDelta::Tool(ToolCallDelta {
|
||||||
|
role: "assistant".to_string(),
|
||||||
|
tool_calls: vec![DeltaToolCall {
|
||||||
|
index: 0,
|
||||||
|
id,
|
||||||
|
r#type: "function".to_string(),
|
||||||
|
function: Function {
|
||||||
|
name: function_name,
|
||||||
|
arguments: tool_calls,
|
||||||
|
},
|
||||||
|
}],
|
||||||
|
}),
|
||||||
|
(None, None) => ChatCompletionDelta::Chat(TextMessage {
|
||||||
|
role: "assistant".to_string(),
|
||||||
|
content: "".to_string(),
|
||||||
|
..Default::default()
|
||||||
|
}),
|
||||||
|
};
|
||||||
|
let choices = vec![ChatCompletionChoice {
|
||||||
|
index: 0,
|
||||||
|
delta,
|
||||||
|
logprobs,
|
||||||
|
finish_reason,
|
||||||
|
}];
|
||||||
|
CompletionType::ChatCompletionChunk(ChatCompletionChunk::new(
|
||||||
|
model_id,
|
||||||
|
system_fingerprint,
|
||||||
|
current_time,
|
||||||
|
choices,
|
||||||
|
None,
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
enum StreamState {
|
||||||
|
/// Before the tools was parsed
|
||||||
|
Buffering,
|
||||||
|
/// We detected a tool call here
|
||||||
|
Tool,
|
||||||
|
/// During the `content` part of the tool call
|
||||||
|
NoTool,
|
||||||
|
/// Finishing frames of the ToolCall
|
||||||
|
NoToolFinish,
|
||||||
|
/// This is without tool calling
|
||||||
|
Content,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct ChatState {
|
||||||
|
state: StreamState,
|
||||||
|
text: String,
|
||||||
|
options: StreamOptions,
|
||||||
|
model_id: String,
|
||||||
|
fingerprint: String,
|
||||||
|
logprobs: bool,
|
||||||
|
id: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ChatState {
|
||||||
|
pub fn new(
|
||||||
|
using_tools: bool,
|
||||||
|
options: StreamOptions,
|
||||||
|
fingerprint: String,
|
||||||
|
model_id: String,
|
||||||
|
logprobs: bool,
|
||||||
|
id: String,
|
||||||
|
) -> Self {
|
||||||
|
let state = if using_tools {
|
||||||
|
StreamState::Buffering
|
||||||
|
} else {
|
||||||
|
StreamState::Content
|
||||||
|
};
|
||||||
|
let text = String::new();
|
||||||
|
Self {
|
||||||
|
state,
|
||||||
|
text,
|
||||||
|
options,
|
||||||
|
fingerprint,
|
||||||
|
model_id,
|
||||||
|
logprobs,
|
||||||
|
id,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn push(&mut self, mut stream_token: StreamResponse) -> Vec<CompletionType> {
|
||||||
|
let mut events = vec![];
|
||||||
|
let token_text = &stream_token.token.text;
|
||||||
|
match self.state {
|
||||||
|
StreamState::Buffering => {
|
||||||
|
self.text.push_str(token_text);
|
||||||
|
// We have a special match for `no_tool` in order to capture directly the `content`
|
||||||
|
// key which should be re-emitted as raw text.
|
||||||
|
if let Ok(value) = serde_json::from_str::<NoTool>(&format!("{}\"}}}}", self.text)) {
|
||||||
|
self.state = StreamState::NoTool;
|
||||||
|
// Modifiy the content of the token to be whatever was captured by the JSON
|
||||||
|
stream_token.token.text = value.function.content;
|
||||||
|
let chat_complete = create_event_from_stream_token(
|
||||||
|
&stream_token,
|
||||||
|
self.logprobs,
|
||||||
|
false,
|
||||||
|
self.fingerprint.clone(),
|
||||||
|
self.model_id.clone(),
|
||||||
|
None,
|
||||||
|
self.id.clone(),
|
||||||
|
);
|
||||||
|
|
||||||
|
events.push(chat_complete);
|
||||||
|
}
|
||||||
|
// XXX Caution, here we do not postfix the quote, so that the current output
|
||||||
|
// Is necessarily finished with quotes for us to be able to parse.
|
||||||
|
let partial = &self.text;
|
||||||
|
let partial = partial.trim_end_matches(|c: char| c.is_whitespace() || c == ',');
|
||||||
|
if let Ok(call) = serde_json::from_str::<Call>(&format!("{}}}}}", partial)) {
|
||||||
|
// This can be no_tool before the content has been emitted
|
||||||
|
if call.function._name != "no_tool" {
|
||||||
|
stream_token.token.text = "{".to_string();
|
||||||
|
let chat_complete = create_event_from_stream_token(
|
||||||
|
&stream_token,
|
||||||
|
self.logprobs,
|
||||||
|
true,
|
||||||
|
self.fingerprint.clone(),
|
||||||
|
self.model_id.clone(),
|
||||||
|
Some(call.function._name),
|
||||||
|
self.id.clone(),
|
||||||
|
);
|
||||||
|
|
||||||
|
events.push(chat_complete);
|
||||||
|
self.state = StreamState::Tool;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
StreamState::Tool => {
|
||||||
|
self.text.push_str(token_text);
|
||||||
|
if serde_json::from_str::<Call>(&self.text).is_ok() {
|
||||||
|
self.state = StreamState::Buffering;
|
||||||
|
let mut text = stream_token.token.text.trim_end();
|
||||||
|
// Effectively trimming only the last closing brace
|
||||||
|
if text.ends_with('}') {
|
||||||
|
text = &text[..text.len() - 1];
|
||||||
|
}
|
||||||
|
stream_token.token.text = text.to_string();
|
||||||
|
let chat_complete = create_event_from_stream_token(
|
||||||
|
&stream_token,
|
||||||
|
self.logprobs,
|
||||||
|
true,
|
||||||
|
self.fingerprint.clone(),
|
||||||
|
self.model_id.clone(),
|
||||||
|
None,
|
||||||
|
self.id.clone(),
|
||||||
|
);
|
||||||
|
events.push(chat_complete);
|
||||||
|
} else {
|
||||||
|
let chat_complete = create_event_from_stream_token(
|
||||||
|
&stream_token,
|
||||||
|
self.logprobs,
|
||||||
|
true,
|
||||||
|
self.fingerprint.clone(),
|
||||||
|
self.model_id.clone(),
|
||||||
|
None,
|
||||||
|
self.id.clone(),
|
||||||
|
);
|
||||||
|
events.push(chat_complete);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// if we skipped sending the buffer we need to avoid sending the following json key and quotes
|
||||||
|
// We have remainder tokens, ignore everying,
|
||||||
|
StreamState::NoToolFinish => {}
|
||||||
|
StreamState::NoTool => {
|
||||||
|
self.text.push_str(token_text);
|
||||||
|
if token_text.contains("\"") {
|
||||||
|
let mut text = self
|
||||||
|
.text
|
||||||
|
.trim_end_matches(|c: char| c.is_whitespace() || c == '}');
|
||||||
|
// Trim once
|
||||||
|
if text.ends_with("\"") {
|
||||||
|
// Verify we have actually trimmed something
|
||||||
|
// The opposite can happen if the model is outputting inline JSON.
|
||||||
|
text = &text[..text.len() - 1];
|
||||||
|
if let Ok(_value) =
|
||||||
|
serde_json::from_str::<NoTool>(&format!("{}\"}}}}", text))
|
||||||
|
{
|
||||||
|
let mut text = token_text
|
||||||
|
.trim_end_matches(|c: char| c.is_whitespace() || c == '}');
|
||||||
|
// Effectively trim_end_match('"', 1)
|
||||||
|
// because we do not want to eventually trim finishing escaped quotes
|
||||||
|
// {{"\"Something\""}}
|
||||||
|
if text.ends_with("\"") {
|
||||||
|
text = &text[..text.len() - 1];
|
||||||
|
}
|
||||||
|
stream_token.token.text = text.to_string();
|
||||||
|
self.state = StreamState::NoToolFinish;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// This escaping is usually inline json escaping and we can therefore remove it.
|
||||||
|
stream_token.token.text = stream_token.token.text.replace("\\", "");
|
||||||
|
let chat_complete = create_event_from_stream_token(
|
||||||
|
&stream_token,
|
||||||
|
self.logprobs,
|
||||||
|
false,
|
||||||
|
self.fingerprint.clone(),
|
||||||
|
self.model_id.clone(),
|
||||||
|
None,
|
||||||
|
self.id.clone(),
|
||||||
|
);
|
||||||
|
|
||||||
|
events.push(chat_complete);
|
||||||
|
}
|
||||||
|
StreamState::Content => {
|
||||||
|
let chat_complete = create_event_from_stream_token(
|
||||||
|
&stream_token,
|
||||||
|
self.logprobs,
|
||||||
|
false,
|
||||||
|
self.fingerprint.clone(),
|
||||||
|
self.model_id.clone(),
|
||||||
|
None,
|
||||||
|
self.id.clone(),
|
||||||
|
);
|
||||||
|
|
||||||
|
events.push(chat_complete);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if self.options.include_usage {
|
||||||
|
if let Some(details) = stream_token.details {
|
||||||
|
let completion_tokens = details.generated_tokens;
|
||||||
|
let prompt_tokens = details.input_length;
|
||||||
|
let total_tokens = prompt_tokens + completion_tokens;
|
||||||
|
|
||||||
|
let usage = Usage {
|
||||||
|
completion_tokens,
|
||||||
|
prompt_tokens,
|
||||||
|
total_tokens,
|
||||||
|
};
|
||||||
|
let current_time = std::time::SystemTime::now()
|
||||||
|
.duration_since(std::time::UNIX_EPOCH)
|
||||||
|
.unwrap_or_else(|_| std::time::Duration::from_secs(0))
|
||||||
|
.as_secs();
|
||||||
|
|
||||||
|
let chat_complete = CompletionType::ChatCompletionChunk(ChatCompletionChunk {
|
||||||
|
id: String::new(),
|
||||||
|
created: current_time,
|
||||||
|
model: self.model_id.clone(),
|
||||||
|
system_fingerprint: self.fingerprint.clone(),
|
||||||
|
choices: vec![],
|
||||||
|
usage: Some(Usage {
|
||||||
|
prompt_tokens: usage.prompt_tokens,
|
||||||
|
completion_tokens: usage.completion_tokens,
|
||||||
|
total_tokens: usage.total_tokens,
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
|
||||||
|
events.push(chat_complete);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
events
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use crate::{
|
||||||
|
ChatCompletionChoice, ChatCompletionDelta, FinishReason, StreamDetails, TextMessage, Token,
|
||||||
|
};
|
||||||
|
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
fn get_text_content(event: &CompletionType) -> &String {
|
||||||
|
match event {
|
||||||
|
CompletionType::ChatCompletionChunk(ChatCompletionChunk { choices, .. }) => {
|
||||||
|
assert_eq!(choices.len(), 1);
|
||||||
|
if let ChatCompletionChoice {
|
||||||
|
delta: ChatCompletionDelta::Chat(TextMessage { content, .. }),
|
||||||
|
..
|
||||||
|
} = &choices[0]
|
||||||
|
{
|
||||||
|
content
|
||||||
|
} else {
|
||||||
|
panic!("Expected plain message");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => panic!("Unexpected chunk"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_tool_call_content(event: &CompletionType) -> (Option<&String>, &String) {
|
||||||
|
match event {
|
||||||
|
CompletionType::ChatCompletionChunk(ChatCompletionChunk { choices, .. }) => {
|
||||||
|
assert_eq!(choices.len(), 1);
|
||||||
|
if let ChatCompletionChoice {
|
||||||
|
delta: ChatCompletionDelta::Tool(ToolCallDelta { tool_calls, .. }),
|
||||||
|
..
|
||||||
|
} = &choices[0]
|
||||||
|
{
|
||||||
|
assert_eq!(tool_calls.len(), 1);
|
||||||
|
let DeltaToolCall {
|
||||||
|
index,
|
||||||
|
id,
|
||||||
|
r#type,
|
||||||
|
function,
|
||||||
|
} = &tool_calls[0];
|
||||||
|
assert_eq!(*index, 0);
|
||||||
|
assert_eq!(id, "0");
|
||||||
|
assert_eq!(r#type, "function");
|
||||||
|
(function.name.as_ref(), &function.arguments)
|
||||||
|
} else {
|
||||||
|
panic!("Expected plain message");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => panic!("Unexpected chunk"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_chat_stream() {
|
||||||
|
let mut chat_state = ChatState::new(
|
||||||
|
false,
|
||||||
|
StreamOptions {
|
||||||
|
include_usage: false,
|
||||||
|
},
|
||||||
|
"fingerprint".to_string(),
|
||||||
|
"model_id".to_string(),
|
||||||
|
false,
|
||||||
|
"0".to_string(),
|
||||||
|
);
|
||||||
|
|
||||||
|
let events = chat_state.push(StreamResponse {
|
||||||
|
generated_text: None,
|
||||||
|
token: Token {
|
||||||
|
id: 42,
|
||||||
|
text: "Hi".to_string(),
|
||||||
|
logprob: 0.0,
|
||||||
|
special: false,
|
||||||
|
},
|
||||||
|
top_tokens: vec![],
|
||||||
|
index: 0,
|
||||||
|
details: None,
|
||||||
|
});
|
||||||
|
assert_eq!(events.len(), 1);
|
||||||
|
match &events[0] {
|
||||||
|
CompletionType::ChatCompletionChunk(ChatCompletionChunk { choices, .. }) => {
|
||||||
|
assert_eq!(
|
||||||
|
choices,
|
||||||
|
&[ChatCompletionChoice {
|
||||||
|
index: 0,
|
||||||
|
delta: ChatCompletionDelta::Chat(TextMessage {
|
||||||
|
role: "assistant".to_string(),
|
||||||
|
content: "Hi".to_string(),
|
||||||
|
tool_call_id: None,
|
||||||
|
}),
|
||||||
|
logprobs: None,
|
||||||
|
finish_reason: None,
|
||||||
|
}]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
_ => panic!("Unexpected chunk"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_chat_stream_usage() {
|
||||||
|
let mut chat_state = ChatState::new(
|
||||||
|
false,
|
||||||
|
StreamOptions {
|
||||||
|
include_usage: true,
|
||||||
|
},
|
||||||
|
"fingerprint".to_string(),
|
||||||
|
"model_id".to_string(),
|
||||||
|
false,
|
||||||
|
"0".to_string(),
|
||||||
|
);
|
||||||
|
|
||||||
|
let events = chat_state.push(StreamResponse {
|
||||||
|
generated_text: None,
|
||||||
|
token: Token {
|
||||||
|
id: 42,
|
||||||
|
text: "Hi".to_string(),
|
||||||
|
logprob: 0.0,
|
||||||
|
special: false,
|
||||||
|
},
|
||||||
|
top_tokens: vec![],
|
||||||
|
index: 0,
|
||||||
|
details: Some(StreamDetails {
|
||||||
|
input_length: 2,
|
||||||
|
generated_tokens: 10,
|
||||||
|
seed: None,
|
||||||
|
finish_reason: FinishReason::Length,
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
assert_eq!(events.len(), 2);
|
||||||
|
match &events[0] {
|
||||||
|
CompletionType::ChatCompletionChunk(ChatCompletionChunk { choices, .. }) => {
|
||||||
|
assert_eq!(
|
||||||
|
choices,
|
||||||
|
&[ChatCompletionChoice {
|
||||||
|
index: 0,
|
||||||
|
delta: ChatCompletionDelta::Chat(TextMessage {
|
||||||
|
role: "assistant".to_string(),
|
||||||
|
content: "Hi".to_string(),
|
||||||
|
tool_call_id: None,
|
||||||
|
}),
|
||||||
|
logprobs: None,
|
||||||
|
// HAS A FINISH REASON
|
||||||
|
finish_reason: Some("length".to_string()),
|
||||||
|
}]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
_ => panic!("Unexpected chunk"),
|
||||||
|
}
|
||||||
|
match &events[1] {
|
||||||
|
CompletionType::ChatCompletionChunk(ChatCompletionChunk { usage, .. }) => {
|
||||||
|
assert_eq!(
|
||||||
|
*usage,
|
||||||
|
Some(Usage {
|
||||||
|
prompt_tokens: 2,
|
||||||
|
completion_tokens: 10,
|
||||||
|
total_tokens: 12,
|
||||||
|
})
|
||||||
|
);
|
||||||
|
}
|
||||||
|
_ => panic!("Unexpected chunk"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_chat_stream_tool_no_tool() {
|
||||||
|
let mut chat_state = ChatState::new(
|
||||||
|
true,
|
||||||
|
StreamOptions {
|
||||||
|
include_usage: true,
|
||||||
|
},
|
||||||
|
"fingerprint".to_string(),
|
||||||
|
"model_id".to_string(),
|
||||||
|
false,
|
||||||
|
"0".to_string(),
|
||||||
|
);
|
||||||
|
|
||||||
|
let tokens = vec![
|
||||||
|
"{\"".to_string(),
|
||||||
|
"function".to_string(),
|
||||||
|
"\":".to_string(),
|
||||||
|
" {\"".to_string(),
|
||||||
|
"_".to_string(),
|
||||||
|
"name".to_string(),
|
||||||
|
"\":".to_string(),
|
||||||
|
" \"".to_string(),
|
||||||
|
"no".to_string(),
|
||||||
|
"_tool".to_string(),
|
||||||
|
"\",".to_string(),
|
||||||
|
" \"".to_string(),
|
||||||
|
"content".to_string(),
|
||||||
|
"\":".to_string(),
|
||||||
|
" \"".to_string(), // Token 14
|
||||||
|
"I".to_string(), // Event 1
|
||||||
|
" am".to_string(), // Event 2
|
||||||
|
" a".to_string(), // Event 3
|
||||||
|
" helpful".to_string(), // Event 4
|
||||||
|
" assistant".to_string(), // Event 5
|
||||||
|
"!\"".to_string(), // Event 6 (with trailing quore removed)
|
||||||
|
"}".to_string(),
|
||||||
|
"}".to_string(),
|
||||||
|
];
|
||||||
|
let tokens: Vec<_> = tokens
|
||||||
|
.into_iter()
|
||||||
|
.map(|text| StreamResponse {
|
||||||
|
generated_text: None,
|
||||||
|
token: Token {
|
||||||
|
id: 42,
|
||||||
|
text: text.to_string(),
|
||||||
|
logprob: 0.0,
|
||||||
|
special: false,
|
||||||
|
},
|
||||||
|
top_tokens: vec![],
|
||||||
|
index: 0,
|
||||||
|
details: None,
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
// Initial ignored output
|
||||||
|
for token in &tokens[..14] {
|
||||||
|
let events = chat_state.push(token.clone());
|
||||||
|
assert_eq!(events.len(), 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
// No tool output
|
||||||
|
let mut output = String::new();
|
||||||
|
for token in &tokens[14..14 + 7] {
|
||||||
|
let events = chat_state.push(token.clone());
|
||||||
|
assert_eq!(events.len(), 1);
|
||||||
|
let content = get_text_content(&events[0]);
|
||||||
|
output.push_str(content);
|
||||||
|
}
|
||||||
|
|
||||||
|
assert_eq!(output, "I am a helpful assistant!");
|
||||||
|
|
||||||
|
// No tool finish
|
||||||
|
for token in &tokens[14 + 7..] {
|
||||||
|
let events = chat_state.push(token.clone());
|
||||||
|
assert_eq!(events.len(), 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_chat_stream_tool_no_tool_many_quotes() {
|
||||||
|
let mut chat_state = ChatState::new(
|
||||||
|
true,
|
||||||
|
StreamOptions {
|
||||||
|
include_usage: true,
|
||||||
|
},
|
||||||
|
"fingerprint".to_string(),
|
||||||
|
"model_id".to_string(),
|
||||||
|
false,
|
||||||
|
"0".to_string(),
|
||||||
|
);
|
||||||
|
|
||||||
|
let tokens = vec![
|
||||||
|
"{\"".to_string(),
|
||||||
|
"function".to_string(),
|
||||||
|
"\":".to_string(),
|
||||||
|
" {\"".to_string(),
|
||||||
|
"_".to_string(),
|
||||||
|
"name".to_string(),
|
||||||
|
"\":".to_string(),
|
||||||
|
" \"".to_string(),
|
||||||
|
"no".to_string(),
|
||||||
|
"_tool".to_string(),
|
||||||
|
"\",".to_string(),
|
||||||
|
" \"".to_string(),
|
||||||
|
"content".to_string(),
|
||||||
|
"\":".to_string(),
|
||||||
|
" \"".to_string(), // Token 14
|
||||||
|
"I".to_string(), // Event 1
|
||||||
|
" am".to_string(), // Event 2
|
||||||
|
" a".to_string(), // Event 3
|
||||||
|
" helpful".to_string(), // Event 4
|
||||||
|
" assistant".to_string(), // Event 5
|
||||||
|
"!\\\"\"".to_string(), // Extra inside the string quote that would get removed
|
||||||
|
"}".to_string(),
|
||||||
|
"}".to_string(),
|
||||||
|
];
|
||||||
|
|
||||||
|
// Initial ignored output
|
||||||
|
for text in &tokens[..14] {
|
||||||
|
let events = chat_state.push(StreamResponse {
|
||||||
|
generated_text: None,
|
||||||
|
token: Token {
|
||||||
|
id: 42,
|
||||||
|
text: text.to_string(),
|
||||||
|
logprob: 0.0,
|
||||||
|
special: false,
|
||||||
|
},
|
||||||
|
top_tokens: vec![],
|
||||||
|
index: 0,
|
||||||
|
details: None,
|
||||||
|
});
|
||||||
|
assert_eq!(events.len(), 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
// No tool output
|
||||||
|
let mut output = String::new();
|
||||||
|
for text in &tokens[14..14 + 7] {
|
||||||
|
let events = chat_state.push(StreamResponse {
|
||||||
|
generated_text: None,
|
||||||
|
token: Token {
|
||||||
|
id: 42,
|
||||||
|
text: text.to_string(),
|
||||||
|
logprob: 0.0,
|
||||||
|
special: false,
|
||||||
|
},
|
||||||
|
top_tokens: vec![],
|
||||||
|
index: 0,
|
||||||
|
details: None,
|
||||||
|
});
|
||||||
|
assert_eq!(events.len(), 1);
|
||||||
|
match &events[0] {
|
||||||
|
CompletionType::ChatCompletionChunk(ChatCompletionChunk { choices, .. }) => {
|
||||||
|
assert_eq!(choices.len(), 1);
|
||||||
|
if let ChatCompletionChoice {
|
||||||
|
delta: ChatCompletionDelta::Chat(TextMessage { content, .. }),
|
||||||
|
..
|
||||||
|
} = &choices[0]
|
||||||
|
{
|
||||||
|
output.push_str(content);
|
||||||
|
} else {
|
||||||
|
panic!("Expected plain message");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => panic!("Unexpected chunk"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
assert_eq!(output, "I am a helpful assistant!\"");
|
||||||
|
|
||||||
|
// No tool finish
|
||||||
|
for text in &tokens[14 + 7..] {
|
||||||
|
let events = chat_state.push(StreamResponse {
|
||||||
|
generated_text: None,
|
||||||
|
token: Token {
|
||||||
|
id: 42,
|
||||||
|
text: text.to_string(),
|
||||||
|
logprob: 0.0,
|
||||||
|
special: false,
|
||||||
|
},
|
||||||
|
top_tokens: vec![],
|
||||||
|
index: 0,
|
||||||
|
details: None,
|
||||||
|
});
|
||||||
|
assert_eq!(events.len(), 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_chat_stream_tool_no_tool_inline_json() {
|
||||||
|
let mut chat_state = ChatState::new(
|
||||||
|
true,
|
||||||
|
StreamOptions {
|
||||||
|
include_usage: true,
|
||||||
|
},
|
||||||
|
"fingerprint".to_string(),
|
||||||
|
"model_id".to_string(),
|
||||||
|
false,
|
||||||
|
"0".to_string(),
|
||||||
|
);
|
||||||
|
|
||||||
|
let tokens = vec![
|
||||||
|
"{\"".to_string(),
|
||||||
|
"function".to_string(),
|
||||||
|
"\":".to_string(),
|
||||||
|
" {\"".to_string(),
|
||||||
|
"_".to_string(),
|
||||||
|
"name".to_string(),
|
||||||
|
"\":".to_string(),
|
||||||
|
" \"".to_string(),
|
||||||
|
"no".to_string(),
|
||||||
|
"_tool".to_string(),
|
||||||
|
"\",".to_string(),
|
||||||
|
" \"".to_string(),
|
||||||
|
"content".to_string(),
|
||||||
|
"\":".to_string(),
|
||||||
|
" \"".to_string(), // Token 14
|
||||||
|
"{\\\"".to_string(), // Event 1
|
||||||
|
"a".to_string(), // Event 1
|
||||||
|
"\\\":".to_string(), // Event 1
|
||||||
|
"2".to_string(), // Event 2
|
||||||
|
",\\".to_string(), // Event 2
|
||||||
|
"\"".to_string(), // Event 2
|
||||||
|
"b".to_string(), // Event 3
|
||||||
|
"\\\": ".to_string(), // Event 4
|
||||||
|
"1".to_string(), // Event 5
|
||||||
|
"}".to_string(), // Event 5
|
||||||
|
"\"}".to_string(), // Extra inside the string quote that would get removed
|
||||||
|
"}".to_string(),
|
||||||
|
];
|
||||||
|
let tokens: Vec<_> = tokens
|
||||||
|
.into_iter()
|
||||||
|
.map(|text| StreamResponse {
|
||||||
|
generated_text: None,
|
||||||
|
token: Token {
|
||||||
|
id: 42,
|
||||||
|
text: text.to_string(),
|
||||||
|
logprob: 0.0,
|
||||||
|
special: false,
|
||||||
|
},
|
||||||
|
top_tokens: vec![],
|
||||||
|
index: 0,
|
||||||
|
details: None,
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
// Initial ignored output
|
||||||
|
for token in &tokens[..14] {
|
||||||
|
let events = chat_state.push(token.clone());
|
||||||
|
assert_eq!(events.len(), 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
// No tool output
|
||||||
|
let mut output = String::new();
|
||||||
|
for token in &tokens[14..14 + 12] {
|
||||||
|
let events = chat_state.push(token.clone());
|
||||||
|
assert_eq!(events.len(), 1, "Current text is {output:?}");
|
||||||
|
let content = get_text_content(&events[0]);
|
||||||
|
output.push_str(content);
|
||||||
|
}
|
||||||
|
|
||||||
|
assert_eq!(output, "{\"a\":2,\"b\": 1}");
|
||||||
|
|
||||||
|
// No tool finish
|
||||||
|
for token in &tokens[14 + 12..] {
|
||||||
|
let events = chat_state.push(token.clone());
|
||||||
|
assert_eq!(events.len(), 0, "Extra events {events:?}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_chat_stream_tool_no_tool_empty() {
|
||||||
|
let mut chat_state = ChatState::new(
|
||||||
|
true,
|
||||||
|
StreamOptions {
|
||||||
|
include_usage: true,
|
||||||
|
},
|
||||||
|
"fingerprint".to_string(),
|
||||||
|
"model_id".to_string(),
|
||||||
|
false,
|
||||||
|
"0".to_string(),
|
||||||
|
);
|
||||||
|
|
||||||
|
let tokens = vec![
|
||||||
|
"{\"".to_string(),
|
||||||
|
"function".to_string(),
|
||||||
|
"\":".to_string(),
|
||||||
|
" {\"".to_string(),
|
||||||
|
"_".to_string(),
|
||||||
|
"name".to_string(),
|
||||||
|
"\":".to_string(),
|
||||||
|
" \"".to_string(),
|
||||||
|
"no".to_string(),
|
||||||
|
"_tool".to_string(),
|
||||||
|
"\",".to_string(),
|
||||||
|
" \"".to_string(),
|
||||||
|
"content".to_string(),
|
||||||
|
"\":\"".to_string(),
|
||||||
|
"\"}".to_string(), // Token 13
|
||||||
|
"}".to_string(), // Event 1
|
||||||
|
];
|
||||||
|
let tokens: Vec<_> = tokens
|
||||||
|
.into_iter()
|
||||||
|
.map(|text| StreamResponse {
|
||||||
|
generated_text: None,
|
||||||
|
token: Token {
|
||||||
|
id: 42,
|
||||||
|
text: text.to_string(),
|
||||||
|
logprob: 0.0,
|
||||||
|
special: false,
|
||||||
|
},
|
||||||
|
top_tokens: vec![],
|
||||||
|
index: 0,
|
||||||
|
details: None,
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
// Initial ignored output
|
||||||
|
for token in &tokens[..13] {
|
||||||
|
let events = chat_state.push(token.clone());
|
||||||
|
assert_eq!(events.len(), 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
// No tool output
|
||||||
|
let mut output = String::new();
|
||||||
|
for token in &tokens[13..13 + 2] {
|
||||||
|
let events = chat_state.push(token.clone());
|
||||||
|
assert_eq!(events.len(), 1, "Current text is {output:?}");
|
||||||
|
let content = get_text_content(&events[0]);
|
||||||
|
output.push_str(content);
|
||||||
|
}
|
||||||
|
|
||||||
|
assert_eq!(output, "");
|
||||||
|
|
||||||
|
// No tool finish
|
||||||
|
for token in &tokens[13 + 2..] {
|
||||||
|
let events = chat_state.push(token.clone());
|
||||||
|
assert_eq!(events.len(), 0, "Extra events {events:?}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_chat_stream_tool_get_weather() {
|
||||||
|
let mut chat_state = ChatState::new(
|
||||||
|
true,
|
||||||
|
StreamOptions {
|
||||||
|
include_usage: true,
|
||||||
|
},
|
||||||
|
"fingerprint".to_string(),
|
||||||
|
"model_id".to_string(),
|
||||||
|
false,
|
||||||
|
"0".to_string(),
|
||||||
|
);
|
||||||
|
|
||||||
|
let tokens = vec![
|
||||||
|
"{\"".to_string(),
|
||||||
|
"function".to_string(),
|
||||||
|
"\":".to_string(),
|
||||||
|
" {\"".to_string(),
|
||||||
|
"_".to_string(),
|
||||||
|
"name".to_string(),
|
||||||
|
"\":".to_string(),
|
||||||
|
" \"".to_string(),
|
||||||
|
"get".to_string(),
|
||||||
|
"_current".to_string(),
|
||||||
|
"_weather".to_string(),
|
||||||
|
"\",".to_string(),
|
||||||
|
// Event 1 is the function name
|
||||||
|
// Event 2 is the start of the arguments "{"
|
||||||
|
" \"".to_string(), // Event 3
|
||||||
|
"location".to_string(), // Event 4
|
||||||
|
"\":".to_string(), // Event 5
|
||||||
|
" \"".to_string(), // Event 6
|
||||||
|
"San".to_string(), // Event 7
|
||||||
|
" Francisco".to_string(), // Event 8
|
||||||
|
",".to_string(), // Event 9
|
||||||
|
" CA".to_string(), // Event 10
|
||||||
|
"\",".to_string(), // Event 11
|
||||||
|
" \"".to_string(), // Event 12
|
||||||
|
"format".to_string(), // Event 13
|
||||||
|
"\":".to_string(), // Event 14
|
||||||
|
" \"".to_string(), // Event 15
|
||||||
|
"c".to_string(), // Event 16
|
||||||
|
"elsius".to_string(), // Event 17
|
||||||
|
"\"}}".to_string(), // Event 18 retained (trailing brace removed)
|
||||||
|
];
|
||||||
|
let tokens: Vec<_> = tokens
|
||||||
|
.into_iter()
|
||||||
|
.map(|text| StreamResponse {
|
||||||
|
generated_text: None,
|
||||||
|
token: Token {
|
||||||
|
id: 42,
|
||||||
|
text: text.to_string(),
|
||||||
|
logprob: 0.0,
|
||||||
|
special: false,
|
||||||
|
},
|
||||||
|
top_tokens: vec![],
|
||||||
|
index: 0,
|
||||||
|
details: None,
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
// Initial ignored output
|
||||||
|
for token in &tokens[..11] {
|
||||||
|
let events = chat_state.push(token.clone());
|
||||||
|
assert_eq!(events.len(), 0, "{events:?}");
|
||||||
|
}
|
||||||
|
|
||||||
|
// No tool output
|
||||||
|
let mut output = String::new();
|
||||||
|
let mut output_name = String::new();
|
||||||
|
for token in &tokens[11..11 + 17] {
|
||||||
|
let events = chat_state.push(token.clone());
|
||||||
|
assert_eq!(events.len(), 1);
|
||||||
|
let (name, arguments) = get_tool_call_content(&events[0]);
|
||||||
|
if let Some(name) = name {
|
||||||
|
assert_eq!(name, "get_current_weather");
|
||||||
|
output_name.push_str(&name);
|
||||||
|
}
|
||||||
|
output.push_str(arguments);
|
||||||
|
}
|
||||||
|
|
||||||
|
assert_eq!(output_name, "get_current_weather");
|
||||||
|
assert_eq!(
|
||||||
|
output,
|
||||||
|
"{ \"location\": \"San Francisco, CA\", \"format\": \"celsius\"}"
|
||||||
|
);
|
||||||
|
|
||||||
|
// No tool finish
|
||||||
|
for token in &tokens[11 + 17..] {
|
||||||
|
let events = chat_state.push(token.clone());
|
||||||
|
assert_eq!(events.len(), 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -16,7 +16,7 @@ pub(crate) fn strftime_now(format_str: String) -> Result<String, minijinja::Erro
|
|||||||
Ok(Local::now().format(&format_str).to_string())
|
Ok(Local::now().format(&format_str).to_string())
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub(crate) struct ChatTemplate {
|
pub(crate) struct ChatTemplate {
|
||||||
template: Template<'static, 'static>,
|
template: Template<'static, 'static>,
|
||||||
bos_token: Option<String>,
|
bos_token: Option<String>,
|
||||||
|
@ -52,7 +52,7 @@ pub struct Infer {
|
|||||||
/// Request backend
|
/// Request backend
|
||||||
backend: Arc<dyn Backend + Send + Sync>,
|
backend: Arc<dyn Backend + Send + Sync>,
|
||||||
/// Chat template
|
/// Chat template
|
||||||
chat_template: Option<ChatTemplate>,
|
pub(crate) chat_template: Option<ChatTemplate>,
|
||||||
/// Inference limit
|
/// Inference limit
|
||||||
limit_concurrent_requests: Arc<Semaphore>,
|
limit_concurrent_requests: Arc<Semaphore>,
|
||||||
/// Backend health
|
/// Backend health
|
||||||
|
@ -8,6 +8,7 @@ pub mod validation;
|
|||||||
mod kserve;
|
mod kserve;
|
||||||
pub mod logging;
|
pub mod logging;
|
||||||
|
|
||||||
|
mod chat;
|
||||||
mod sagemaker;
|
mod sagemaker;
|
||||||
pub mod usage_stats;
|
pub mod usage_stats;
|
||||||
mod vertex;
|
mod vertex;
|
||||||
@ -20,6 +21,7 @@ use serde::{Deserialize, Serialize};
|
|||||||
use tokenizers::Encoding;
|
use tokenizers::Encoding;
|
||||||
use tracing::warn;
|
use tracing::warn;
|
||||||
use utoipa::ToSchema;
|
use utoipa::ToSchema;
|
||||||
|
use uuid::Uuid;
|
||||||
use validation::Validation;
|
use validation::Validation;
|
||||||
|
|
||||||
#[allow(clippy::large_enum_variant)]
|
#[allow(clippy::large_enum_variant)]
|
||||||
@ -541,6 +543,7 @@ pub(crate) struct Chunk {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Deserialize, Serialize, ToSchema)]
|
#[derive(Clone, Deserialize, Serialize, ToSchema)]
|
||||||
|
#[cfg_attr(test, derive(Debug))]
|
||||||
pub(crate) struct ChatCompletion {
|
pub(crate) struct ChatCompletion {
|
||||||
pub id: String,
|
pub id: String,
|
||||||
#[schema(example = "1706270835")]
|
#[schema(example = "1706270835")]
|
||||||
@ -553,6 +556,7 @@ pub(crate) struct ChatCompletion {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Deserialize, Serialize, ToSchema)]
|
#[derive(Clone, Deserialize, Serialize, ToSchema)]
|
||||||
|
#[cfg_attr(test, derive(Debug))]
|
||||||
pub(crate) struct ChatCompletionComplete {
|
pub(crate) struct ChatCompletionComplete {
|
||||||
pub index: u32,
|
pub index: u32,
|
||||||
pub message: OutputMessage,
|
pub message: OutputMessage,
|
||||||
@ -561,6 +565,7 @@ pub(crate) struct ChatCompletionComplete {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Deserialize, Serialize, ToSchema)]
|
#[derive(Clone, Deserialize, Serialize, ToSchema)]
|
||||||
|
#[cfg_attr(test, derive(Debug, PartialEq))]
|
||||||
pub(crate) struct ChatCompletionLogprobs {
|
pub(crate) struct ChatCompletionLogprobs {
|
||||||
content: Vec<ChatCompletionLogprob>,
|
content: Vec<ChatCompletionLogprob>,
|
||||||
}
|
}
|
||||||
@ -619,6 +624,7 @@ impl From<(Vec<Token>, Vec<Vec<Token>>)> for ChatCompletionLogprobs {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Deserialize, Serialize, ToSchema)]
|
#[derive(Clone, Deserialize, Serialize, ToSchema)]
|
||||||
|
#[cfg_attr(test, derive(Debug, PartialEq))]
|
||||||
pub(crate) struct ChatCompletionLogprob {
|
pub(crate) struct ChatCompletionLogprob {
|
||||||
token: String,
|
token: String,
|
||||||
logprob: f32,
|
logprob: f32,
|
||||||
@ -626,12 +632,14 @@ pub(crate) struct ChatCompletionLogprob {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Deserialize, Serialize, ToSchema)]
|
#[derive(Clone, Deserialize, Serialize, ToSchema)]
|
||||||
|
#[cfg_attr(test, derive(Debug, PartialEq))]
|
||||||
pub(crate) struct ChatCompletionTopLogprob {
|
pub(crate) struct ChatCompletionTopLogprob {
|
||||||
token: String,
|
token: String,
|
||||||
logprob: f32,
|
logprob: f32,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Deserialize, Serialize, ToSchema, Default)]
|
#[derive(Clone, Deserialize, Serialize, ToSchema, Default)]
|
||||||
|
#[cfg_attr(test, derive(Debug, PartialEq))]
|
||||||
pub(crate) struct Usage {
|
pub(crate) struct Usage {
|
||||||
pub prompt_tokens: u32,
|
pub prompt_tokens: u32,
|
||||||
pub completion_tokens: u32,
|
pub completion_tokens: u32,
|
||||||
@ -640,6 +648,7 @@ pub(crate) struct Usage {
|
|||||||
|
|
||||||
#[derive(Clone, Serialize, ToSchema)]
|
#[derive(Clone, Serialize, ToSchema)]
|
||||||
#[serde(tag = "object")]
|
#[serde(tag = "object")]
|
||||||
|
#[cfg_attr(test, derive(Debug))]
|
||||||
enum CompletionType {
|
enum CompletionType {
|
||||||
#[serde(rename = "chat.completion.chunk")]
|
#[serde(rename = "chat.completion.chunk")]
|
||||||
ChatCompletionChunk(ChatCompletionChunk),
|
ChatCompletionChunk(ChatCompletionChunk),
|
||||||
@ -707,6 +716,7 @@ impl ChatCompletion {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#[derive(Clone, Serialize, ToSchema)]
|
#[derive(Clone, Serialize, ToSchema)]
|
||||||
|
#[cfg_attr(test, derive(Debug))]
|
||||||
pub(crate) struct ChatCompletionChunk {
|
pub(crate) struct ChatCompletionChunk {
|
||||||
pub id: String,
|
pub id: String,
|
||||||
#[schema(example = "1706270978")]
|
#[schema(example = "1706270978")]
|
||||||
@ -719,6 +729,7 @@ pub(crate) struct ChatCompletionChunk {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Serialize, ToSchema)]
|
#[derive(Clone, Serialize, ToSchema)]
|
||||||
|
#[cfg_attr(test, derive(Debug, PartialEq))]
|
||||||
pub(crate) struct ChatCompletionChoice {
|
pub(crate) struct ChatCompletionChoice {
|
||||||
pub index: u32,
|
pub index: u32,
|
||||||
pub delta: ChatCompletionDelta,
|
pub delta: ChatCompletionDelta,
|
||||||
@ -735,6 +746,7 @@ pub struct ToolCallDelta {
|
|||||||
|
|
||||||
#[derive(Clone, Debug, Serialize, ToSchema)]
|
#[derive(Clone, Debug, Serialize, ToSchema)]
|
||||||
#[serde(untagged)]
|
#[serde(untagged)]
|
||||||
|
#[cfg_attr(test, derive(PartialEq))]
|
||||||
enum ChatCompletionDelta {
|
enum ChatCompletionDelta {
|
||||||
Chat(TextMessage),
|
Chat(TextMessage),
|
||||||
Tool(ToolCallDelta),
|
Tool(ToolCallDelta),
|
||||||
@ -759,48 +771,17 @@ impl ChatCompletionChunk {
|
|||||||
pub(crate) fn new(
|
pub(crate) fn new(
|
||||||
model: String,
|
model: String,
|
||||||
system_fingerprint: String,
|
system_fingerprint: String,
|
||||||
delta: Option<String>,
|
|
||||||
tool_calls: Option<Vec<String>>,
|
|
||||||
created: u64,
|
created: u64,
|
||||||
logprobs: Option<ChatCompletionLogprobs>,
|
choices: Vec<ChatCompletionChoice>,
|
||||||
finish_reason: Option<String>,
|
usage: Option<Usage>,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
let delta = match (delta, tool_calls) {
|
|
||||||
(Some(delta), _) => ChatCompletionDelta::Chat(TextMessage {
|
|
||||||
role: "assistant".to_string(),
|
|
||||||
content: delta,
|
|
||||||
..Default::default()
|
|
||||||
}),
|
|
||||||
(None, Some(tool_calls)) => ChatCompletionDelta::Tool(ToolCallDelta {
|
|
||||||
role: "assistant".to_string(),
|
|
||||||
tool_calls: vec![DeltaToolCall {
|
|
||||||
index: 0,
|
|
||||||
id: String::new(),
|
|
||||||
r#type: "function".to_string(),
|
|
||||||
function: Function {
|
|
||||||
name: None,
|
|
||||||
arguments: tool_calls[0].to_string(),
|
|
||||||
},
|
|
||||||
}],
|
|
||||||
}),
|
|
||||||
(None, None) => ChatCompletionDelta::Chat(TextMessage {
|
|
||||||
role: "assistant".to_string(),
|
|
||||||
content: "".to_string(),
|
|
||||||
..Default::default()
|
|
||||||
}),
|
|
||||||
};
|
|
||||||
Self {
|
Self {
|
||||||
id: String::new(),
|
id: String::new(),
|
||||||
created,
|
created,
|
||||||
model,
|
model,
|
||||||
system_fingerprint,
|
system_fingerprint,
|
||||||
choices: vec![ChatCompletionChoice {
|
choices,
|
||||||
index: 0,
|
usage,
|
||||||
delta,
|
|
||||||
logprobs,
|
|
||||||
finish_reason,
|
|
||||||
}],
|
|
||||||
usage: None,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -915,7 +896,7 @@ pub(crate) struct ChatRequest {
|
|||||||
/// Options for streaming response. Only set this when you set stream: true.
|
/// Options for streaming response. Only set this when you set stream: true.
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
#[schema(nullable = true, example = "null")]
|
#[schema(nullable = true, example = "null")]
|
||||||
pub stream_options: Option<StreamOptions>,
|
pub stream_options: StreamOptions,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ChatRequest {
|
impl ChatRequest {
|
||||||
@ -1015,13 +996,37 @@ impl ChatRequest {
|
|||||||
using_tools,
|
using_tools,
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn next_int_id(&self) -> Result<String, Box<dyn std::error::Error>> {
|
||||||
|
let mut id: usize = 0;
|
||||||
|
for message in &self.messages {
|
||||||
|
if let MessageBody::Tool { tool_calls } = &message.body {
|
||||||
|
for tool_call in tool_calls {
|
||||||
|
let new_id: usize = tool_call.id.parse()?;
|
||||||
|
id = std::cmp::max(id, new_id + 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(id.to_string())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Try to have linearly increasing id
|
||||||
|
/// or resort to using Uuid if the initial
|
||||||
|
/// scheme is not understood
|
||||||
|
fn next_tool_call_id(&self) -> String {
|
||||||
|
self.next_int_id().unwrap_or_else(|_| {
|
||||||
|
let uid = Uuid::new_v4().to_string();
|
||||||
|
uid.to_string()
|
||||||
|
})
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Deserialize, ToSchema, Serialize)]
|
#[derive(Clone, Deserialize, ToSchema, Serialize, Default)]
|
||||||
#[cfg_attr(test, derive(Debug, PartialEq))]
|
#[cfg_attr(test, derive(Debug, PartialEq))]
|
||||||
struct StreamOptions {
|
struct StreamOptions {
|
||||||
/// If set, an additional chunk will be streamed before the data: [DONE] message. The usage field on this chunk shows the token usage statistics for the entire request, and the choices field will always be an empty array. All other chunks will also include a usage field, but with a null value.
|
/// If set, an additional chunk will be streamed before the data: [DONE] message. The usage field on this chunk shows the token usage statistics for the entire request, and the choices field will always be an empty array. All other chunks will also include a usage field, but with a null value.
|
||||||
#[schema(example = "true")]
|
#[schema(example = "true")]
|
||||||
|
#[serde(default)]
|
||||||
include_usage: bool,
|
include_usage: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1445,7 +1450,7 @@ pub(crate) struct ChatTokenizeResponse {
|
|||||||
#[serde(transparent)]
|
#[serde(transparent)]
|
||||||
pub(crate) struct TokenizeResponse(Vec<SimpleToken>);
|
pub(crate) struct TokenizeResponse(Vec<SimpleToken>);
|
||||||
|
|
||||||
#[derive(Serialize, ToSchema)]
|
#[derive(Serialize, ToSchema, Clone)]
|
||||||
pub(crate) struct StreamDetails {
|
pub(crate) struct StreamDetails {
|
||||||
#[schema(example = "length")]
|
#[schema(example = "length")]
|
||||||
pub finish_reason: FinishReason,
|
pub finish_reason: FinishReason,
|
||||||
@ -1457,7 +1462,7 @@ pub(crate) struct StreamDetails {
|
|||||||
pub input_length: u32,
|
pub input_length: u32,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Serialize, ToSchema)]
|
#[derive(Serialize, ToSchema, Clone)]
|
||||||
pub(crate) struct StreamResponse {
|
pub(crate) struct StreamResponse {
|
||||||
pub index: u32,
|
pub index: u32,
|
||||||
pub token: Token,
|
pub token: Token,
|
||||||
@ -1700,9 +1705,25 @@ mod tests {
|
|||||||
|
|
||||||
assert!(matches!(
|
assert!(matches!(
|
||||||
request.stream_options,
|
request.stream_options,
|
||||||
Some(StreamOptions {
|
StreamOptions {
|
||||||
include_usage: true
|
include_usage: true
|
||||||
})
|
}
|
||||||
|
));
|
||||||
|
|
||||||
|
let json = json!({
|
||||||
|
"model": "",
|
||||||
|
"messages": [{
|
||||||
|
"role": "user",
|
||||||
|
"content": "Hello"
|
||||||
|
}]
|
||||||
|
});
|
||||||
|
let request: ChatRequest = serde_json::from_str(json.to_string().as_str()).unwrap();
|
||||||
|
|
||||||
|
assert!(matches!(
|
||||||
|
request.stream_options,
|
||||||
|
StreamOptions {
|
||||||
|
include_usage: false
|
||||||
|
}
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
use crate::chat::ChatState;
|
||||||
/// HTTP Server logic
|
/// HTTP Server logic
|
||||||
use crate::config::Config;
|
use crate::config::Config;
|
||||||
use crate::infer::{Backend, Infer, InferError, InferResponse, InferStreamResponse};
|
use crate::infer::{Backend, Infer, InferError, InferResponse, InferStreamResponse};
|
||||||
@ -47,8 +48,6 @@ use http::header::AUTHORIZATION;
|
|||||||
use metrics_exporter_prometheus::{Matcher, PrometheusBuilder, PrometheusHandle};
|
use metrics_exporter_prometheus::{Matcher, PrometheusBuilder, PrometheusHandle};
|
||||||
use pyo3::prelude::*;
|
use pyo3::prelude::*;
|
||||||
use pyo3::types::IntoPyDict;
|
use pyo3::types::IntoPyDict;
|
||||||
use regex::Regex;
|
|
||||||
use serde_json::Value;
|
|
||||||
use std::convert::Infallible;
|
use std::convert::Infallible;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::BufReader;
|
use std::io::BufReader;
|
||||||
@ -1114,62 +1113,6 @@ pub(crate) async fn completions(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
enum StreamState {
|
|
||||||
Buffering,
|
|
||||||
BufferTrailing,
|
|
||||||
Content { skip_close_quote: bool },
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Convert a StreamResponse into an Event to be sent over SSE
|
|
||||||
fn create_event_from_stream_token(
|
|
||||||
stream_token: &StreamResponse,
|
|
||||||
logprobs: bool,
|
|
||||||
inner_using_tools: bool,
|
|
||||||
system_fingerprint: String,
|
|
||||||
model_id: String,
|
|
||||||
) -> Event {
|
|
||||||
let event = Event::default();
|
|
||||||
let current_time = std::time::SystemTime::now()
|
|
||||||
.duration_since(std::time::UNIX_EPOCH)
|
|
||||||
.unwrap_or_else(|_| std::time::Duration::from_secs(0))
|
|
||||||
.as_secs();
|
|
||||||
|
|
||||||
let logprobs = logprobs.then(|| {
|
|
||||||
ChatCompletionLogprobs::from((stream_token.token.clone(), stream_token.top_tokens.clone()))
|
|
||||||
});
|
|
||||||
|
|
||||||
// replace the content with the tool calls if grammar is present
|
|
||||||
let (content, tool_calls) = if inner_using_tools {
|
|
||||||
(None, Some(vec![stream_token.token.text.clone()]))
|
|
||||||
} else {
|
|
||||||
let content = if !stream_token.token.special {
|
|
||||||
Some(stream_token.token.text.clone())
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
};
|
|
||||||
|
|
||||||
(content, None)
|
|
||||||
};
|
|
||||||
let finish_reason = stream_token
|
|
||||||
.details
|
|
||||||
.as_ref()
|
|
||||||
.map(|details| details.finish_reason.format(true));
|
|
||||||
let chat_complete = CompletionType::ChatCompletionChunk(ChatCompletionChunk::new(
|
|
||||||
model_id.clone(),
|
|
||||||
system_fingerprint.clone(),
|
|
||||||
content,
|
|
||||||
tool_calls,
|
|
||||||
current_time,
|
|
||||||
logprobs,
|
|
||||||
finish_reason,
|
|
||||||
));
|
|
||||||
|
|
||||||
event.json_data(chat_complete).unwrap_or_else(|e| {
|
|
||||||
println!("Failed to serialize ChatCompletionChunk: {:?}", e);
|
|
||||||
Event::default()
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Generate tokens
|
/// Generate tokens
|
||||||
#[utoipa::path(
|
#[utoipa::path(
|
||||||
post,
|
post,
|
||||||
@ -1219,6 +1162,9 @@ pub(crate) async fn chat_completions(
|
|||||||
logprobs,
|
logprobs,
|
||||||
..
|
..
|
||||||
} = chat.clone();
|
} = chat.clone();
|
||||||
|
|
||||||
|
tracing::debug!("Got chat_template {:?}", infer.chat_template);
|
||||||
|
let id = chat.next_tool_call_id();
|
||||||
let (generate_request, using_tools): (GenerateRequest, bool) =
|
let (generate_request, using_tools): (GenerateRequest, bool) =
|
||||||
chat.try_into_generate(&infer)?;
|
chat.try_into_generate(&infer)?;
|
||||||
span.record("parameters", format!("{:?}", generate_request.parameters));
|
span.record("parameters", format!("{:?}", generate_request.parameters));
|
||||||
@ -1235,165 +1181,19 @@ pub(crate) async fn chat_completions(
|
|||||||
let (headers, response_stream) =
|
let (headers, response_stream) =
|
||||||
generate_stream_internal(infer, compute_type, Json(generate_request), span).await;
|
generate_stream_internal(infer, compute_type, Json(generate_request), span).await;
|
||||||
|
|
||||||
// regex to match any function name
|
|
||||||
let function_regex = match Regex::new(r#"\{"function":\{"_name":"([^"]+)""#) {
|
|
||||||
Ok(regex) => regex,
|
|
||||||
Err(e) => {
|
|
||||||
return Err((
|
|
||||||
StatusCode::INTERNAL_SERVER_ERROR,
|
|
||||||
Json(ErrorResponse {
|
|
||||||
error: format!("Failed to compile regex: {}", e),
|
|
||||||
error_type: "regex".to_string(),
|
|
||||||
}),
|
|
||||||
))
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
let response_stream = async_stream::stream! {
|
let response_stream = async_stream::stream! {
|
||||||
let mut response_stream = Box::pin(response_stream);
|
let mut response_stream = Box::pin(response_stream);
|
||||||
let mut buffer = Vec::new();
|
let mut state = ChatState::new(using_tools, stream_options, system_fingerprint, model_id, logprobs, id);
|
||||||
let mut json_buffer = String::new();
|
|
||||||
let mut state = if using_tools {
|
|
||||||
StreamState::Buffering
|
|
||||||
} else {
|
|
||||||
StreamState::Content {
|
|
||||||
skip_close_quote: false,
|
|
||||||
}
|
|
||||||
};
|
|
||||||
let mut response_as_tool = using_tools;
|
|
||||||
while let Some(result) = response_stream.next().await {
|
while let Some(result) = response_stream.next().await {
|
||||||
match result{
|
match result{
|
||||||
Ok(stream_token) => {
|
Ok(stream_token) => {
|
||||||
let token_text = &stream_token.token.text.clone();
|
let events = state.push(stream_token);
|
||||||
let usage = stream_token.details.as_ref().map(|details| {
|
for chat_complete in events{
|
||||||
let completion_tokens = details.generated_tokens;
|
yield Ok(Event::default().json_data(chat_complete).unwrap_or_else(|e| {
|
||||||
let prompt_tokens = details.input_length;
|
tracing::error!("Failed to serialize ChatCompletionChunk: {:?}", e);
|
||||||
let total_tokens = prompt_tokens + completion_tokens;
|
Event::default()
|
||||||
|
|
||||||
Usage {
|
|
||||||
completion_tokens,
|
|
||||||
prompt_tokens,
|
|
||||||
total_tokens,
|
|
||||||
}
|
|
||||||
});
|
|
||||||
match state {
|
|
||||||
StreamState::Buffering => {
|
|
||||||
json_buffer.push_str(&token_text.replace(" ", ""));
|
|
||||||
buffer.push(stream_token);
|
|
||||||
if let Some(captures) = function_regex.captures(&json_buffer) {
|
|
||||||
let function_name = captures[1].to_string();
|
|
||||||
if function_name == "no_tool" {
|
|
||||||
state = StreamState::BufferTrailing;
|
|
||||||
response_as_tool = false;
|
|
||||||
buffer.clear();
|
|
||||||
json_buffer.clear();
|
|
||||||
} else {
|
|
||||||
state = StreamState::Content {
|
|
||||||
skip_close_quote: false,
|
|
||||||
};
|
|
||||||
// send all the buffered messages
|
|
||||||
for stream_token in &buffer {
|
|
||||||
let event = create_event_from_stream_token(
|
|
||||||
stream_token,
|
|
||||||
logprobs,
|
|
||||||
response_as_tool,
|
|
||||||
system_fingerprint.clone(),
|
|
||||||
model_id.clone(),
|
|
||||||
);
|
|
||||||
yield Ok::<Event, Infallible>(event);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// if we skipped sending the buffer we need to avoid sending the following json key and quotes
|
|
||||||
StreamState::BufferTrailing => {
|
|
||||||
let infix_text = "\"content\":\"";
|
|
||||||
json_buffer.push_str(&token_text.replace(" ", ""));
|
|
||||||
// keep capturing until we find the infix text
|
|
||||||
match json_buffer.find(infix_text) {
|
|
||||||
Some(content_key_index) => {
|
|
||||||
json_buffer =
|
|
||||||
json_buffer[content_key_index + infix_text.len()..].to_string();
|
|
||||||
}
|
|
||||||
None => {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// if there is leftover text after removing the infix text, we need to send it
|
|
||||||
if !json_buffer.is_empty() {
|
|
||||||
let event = Event::default();
|
|
||||||
let current_time = std::time::SystemTime::now()
|
|
||||||
.duration_since(std::time::UNIX_EPOCH)
|
|
||||||
.unwrap_or_else(|_| std::time::Duration::from_secs(0))
|
|
||||||
.as_secs();
|
|
||||||
let chat_complete =
|
|
||||||
CompletionType::ChatCompletionChunk(ChatCompletionChunk::new(
|
|
||||||
model_id.clone(),
|
|
||||||
system_fingerprint.clone(),
|
|
||||||
Some(json_buffer.clone()),
|
|
||||||
None,
|
|
||||||
current_time,
|
|
||||||
None,
|
|
||||||
None,
|
|
||||||
));
|
|
||||||
yield Ok(event.json_data(chat_complete).unwrap_or_else(|e| {
|
|
||||||
InferError::StreamSerializationError(e.to_string()).into()
|
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
// cleanup the buffers
|
|
||||||
buffer.clear();
|
|
||||||
json_buffer.clear();
|
|
||||||
state = StreamState::Content {
|
|
||||||
skip_close_quote: true,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
StreamState::Content { skip_close_quote } => {
|
|
||||||
if skip_close_quote && token_text.contains('"') {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
// send the content
|
|
||||||
let event = create_event_from_stream_token(
|
|
||||||
&stream_token,
|
|
||||||
logprobs,
|
|
||||||
response_as_tool,
|
|
||||||
system_fingerprint.clone(),
|
|
||||||
model_id.clone(),
|
|
||||||
);
|
|
||||||
|
|
||||||
yield Ok::<Event, Infallible>(event);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let should_send_usage = usage.is_some()
|
|
||||||
&& stream_options
|
|
||||||
.as_ref()
|
|
||||||
.is_some_and(|opts| opts.include_usage);
|
|
||||||
|
|
||||||
if should_send_usage {
|
|
||||||
let usage_data = usage.unwrap();
|
|
||||||
let current_time = std::time::SystemTime::now()
|
|
||||||
.duration_since(std::time::UNIX_EPOCH)
|
|
||||||
.unwrap_or_else(|_| std::time::Duration::from_secs(0))
|
|
||||||
.as_secs();
|
|
||||||
|
|
||||||
let chat_complete = CompletionType::ChatCompletionChunk(ChatCompletionChunk {
|
|
||||||
id: String::new(),
|
|
||||||
created: current_time,
|
|
||||||
model: model_id.clone(),
|
|
||||||
system_fingerprint: system_fingerprint.clone(),
|
|
||||||
choices: vec![],
|
|
||||||
usage: Some(Usage {
|
|
||||||
prompt_tokens: usage_data.prompt_tokens,
|
|
||||||
completion_tokens: usage_data.completion_tokens,
|
|
||||||
total_tokens: usage_data.total_tokens,
|
|
||||||
}),
|
|
||||||
});
|
|
||||||
|
|
||||||
yield Ok(Event::default()
|
|
||||||
.json_data(chat_complete)
|
|
||||||
.unwrap_or_else(|e| InferError::StreamSerializationError(e.to_string()).into()));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
Err(err) => yield Ok(err.into_openai_event())
|
Err(err) => yield Ok(err.into_openai_event())
|
||||||
}
|
}
|
||||||
@ -1413,56 +1213,7 @@ pub(crate) async fn chat_completions(
|
|||||||
.as_secs();
|
.as_secs();
|
||||||
|
|
||||||
let (tool_calls, output) = if using_tools {
|
let (tool_calls, output) = if using_tools {
|
||||||
let gen_text_value: Value =
|
crate::chat::parse_output(&generation.generated_text)?
|
||||||
serde_json::from_str(&generation.generated_text).map_err(|e| {
|
|
||||||
InferError::ToolError(format!(
|
|
||||||
"Failed to parse generated text: {} {:?}",
|
|
||||||
e, generation.generated_text
|
|
||||||
))
|
|
||||||
})?;
|
|
||||||
let function = gen_text_value.get("function").ok_or(InferError::ToolError(
|
|
||||||
"No function found in generated text".to_string(),
|
|
||||||
))?;
|
|
||||||
|
|
||||||
let name = function
|
|
||||||
.get("_name")
|
|
||||||
.and_then(Value::as_str)
|
|
||||||
.ok_or(InferError::ToolError(
|
|
||||||
"No _name found in generated text".to_string(),
|
|
||||||
))?
|
|
||||||
.to_string();
|
|
||||||
|
|
||||||
let mut arguments = function.clone();
|
|
||||||
if let Value::Object(ref mut props) = arguments {
|
|
||||||
props.remove("_name");
|
|
||||||
}
|
|
||||||
match name.as_str() {
|
|
||||||
"no_tool" => {
|
|
||||||
// parse the content message
|
|
||||||
let content_message = arguments
|
|
||||||
.get("content")
|
|
||||||
.and_then(Value::as_str)
|
|
||||||
.ok_or_else(|| {
|
|
||||||
InferError::ToolError(
|
|
||||||
"No `content` found in generated text".to_string(),
|
|
||||||
)
|
|
||||||
})?
|
|
||||||
.to_string();
|
|
||||||
(None, Some(content_message))
|
|
||||||
}
|
|
||||||
_ => {
|
|
||||||
let tool_calls = vec![ToolCall {
|
|
||||||
id: "0".to_string(),
|
|
||||||
r#type: "function".to_string(),
|
|
||||||
function: FunctionDefinition {
|
|
||||||
description: None,
|
|
||||||
name,
|
|
||||||
arguments,
|
|
||||||
},
|
|
||||||
}];
|
|
||||||
(Some(tool_calls), None)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
(None, Some(generation.generated_text))
|
(None, Some(generation.generated_text))
|
||||||
};
|
};
|
||||||
@ -1817,6 +1568,7 @@ pub async fn run(
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
Type::Cache(cache) => {
|
Type::Cache(cache) => {
|
||||||
|
tracing::info!("Cache {cache:?}");
|
||||||
let repo = cache.repo(Repo::with_revision(
|
let repo = cache.repo(Repo::with_revision(
|
||||||
tokenizer_name.to_string(),
|
tokenizer_name.to_string(),
|
||||||
RepoType::Model,
|
RepoType::Model,
|
||||||
@ -1833,6 +1585,7 @@ pub async fn run(
|
|||||||
};
|
};
|
||||||
|
|
||||||
// Read the JSON contents of the file as an instance of 'HubTokenizerConfig'.
|
// Read the JSON contents of the file as an instance of 'HubTokenizerConfig'.
|
||||||
|
tracing::warn!("Tokenizer_config {tokenizer_config_path:?} - {tokenizer_config_filename:?}");
|
||||||
let tokenizer_config: Option<HubTokenizerConfig> = if let Some(filename) = tokenizer_config_path
|
let tokenizer_config: Option<HubTokenizerConfig> = if let Some(filename) = tokenizer_config_path
|
||||||
{
|
{
|
||||||
HubTokenizerConfig::from_file(filename)
|
HubTokenizerConfig::from_file(filename)
|
||||||
|
@ -542,6 +542,7 @@ class Qwen2VLForConditionalGeneration(nn.Module):
|
|||||||
max_s=max_s,
|
max_s=max_s,
|
||||||
true_max_s=max_s,
|
true_max_s=max_s,
|
||||||
prefill_cache_indices=prefill_cache_indices,
|
prefill_cache_indices=prefill_cache_indices,
|
||||||
|
adapter_data=adapter_data,
|
||||||
)
|
)
|
||||||
if lm_head_indices is not None:
|
if lm_head_indices is not None:
|
||||||
hidden_states = hidden_states[lm_head_indices]
|
hidden_states = hidden_states[lm_head_indices]
|
||||||
|
Loading…
Reference in New Issue
Block a user