Making tool_calls a vector. (#3075)

* Making `tool_calls` a vector.

* Update doc.

* Fixing the nix overlay with updated version.

* Add openai dependency.

* Updating the old tests.

* Trying to reduce the logs in the case of errors.

* Less spammy logs too.
This commit is contained in:
Nicolas Patry 2025-03-05 22:32:31 +01:00 committed by GitHub
parent 3208d1cd1d
commit 8e92942a18
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
21 changed files with 1158 additions and 79 deletions

View File

@ -311,7 +311,7 @@ impl State {
+ entry.request.stopping_parameters.max_new_tokens + entry.request.stopping_parameters.max_new_tokens
+ self.speculate + self.speculate
- 1; - 1;
tracing::debug!("Allocating {tokens} with {input_ids:?}"); // tracing::debug!("Allocating {tokens} with {input_ids:?}");
let block_allocation = match block_allocator.allocate(tokens, input_ids).await { let block_allocation = match block_allocator.allocate(tokens, input_ids).await {
None => { None => {
@ -322,7 +322,7 @@ impl State {
break 'entry_loop; break 'entry_loop;
} }
Some(mut block_allocation) => { Some(mut block_allocation) => {
tracing::debug!("Allocation: {block_allocation:?}"); // tracing::debug!("Allocation: {block_allocation:?}");
max_blocks = max(max_blocks, block_allocation.blocks.len() as u32); max_blocks = max(max_blocks, block_allocation.blocks.len() as u32);
if block_allocation.prefix_len == entry.request.input_length { if block_allocation.prefix_len == entry.request.input_length {

View File

@ -67,7 +67,7 @@ class ChoiceDeltaToolCall(BaseModel):
class ChoiceDelta(BaseModel): class ChoiceDelta(BaseModel):
role: str role: str
content: Optional[str] = None content: Optional[str] = None
tool_calls: Optional[ChoiceDeltaToolCall] = None tool_calls: Optional[List[ChoiceDeltaToolCall]] = None
class Choice(BaseModel): class Choice(BaseModel):

View File

@ -2302,7 +2302,10 @@
"example": "assistant" "example": "assistant"
}, },
"tool_calls": { "tool_calls": {
"$ref": "#/components/schemas/DeltaToolCall" "type": "array",
"items": {
"$ref": "#/components/schemas/DeltaToolCall"
}
} }
} }
}, },

View File

@ -98,7 +98,7 @@ def pytest_collection_modifyitems(config, items):
selector(item) selector(item)
@pytest.fixture(autouse=True) @pytest.fixture(autouse=True, scope="module")
def container_log(request: SubRequest): def container_log(request: SubRequest):
error_log = request.getfixturevalue("error_log") error_log = request.getfixturevalue("error_log")
assert error_log is not None assert error_log is not None
@ -269,7 +269,17 @@ class ResponseComparator(JSONSnapshotExtension):
def eq_chat_complete_chunk( def eq_chat_complete_chunk(
response: ChatCompletionChunk, other: ChatCompletionChunk response: ChatCompletionChunk, other: ChatCompletionChunk
) -> bool: ) -> bool:
return response.choices[0].delta.content == other.choices[0].delta.content if response.choices[0].delta.content is not None:
return (
response.choices[0].delta.content == other.choices[0].delta.content
)
elif response.choices[0].delta.tool_calls is not None:
return (
response.choices[0].delta.tool_calls
== other.choices[0].delta.tool_calls
)
else:
raise RuntimeError(f"Invalid empty chat chunk {response} vs {other}")
def eq_response(response: Response, other: Response) -> bool: def eq_response(response: Response, other: Response) -> bool:
return response.generated_text == other.generated_text and eq_details( return response.generated_text == other.generated_text and eq_details(

View File

@ -26,11 +26,11 @@
"usage": null "usage": null
} }
], ],
"created": 1732293383, "created": 1741195536,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion", "object": "chat.completion",
"system_fingerprint": "2.4.1-dev0-native", "system_fingerprint": "3.1.2-dev0-native",
"usage": { "usage": {
"completion_tokens": 30, "completion_tokens": 30,
"prompt_tokens": 615, "prompt_tokens": 615,

View File

@ -26,11 +26,11 @@
"usage": null "usage": null
} }
], ],
"created": 1732293384, "created": 1741195538,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion", "object": "chat.completion",
"system_fingerprint": "2.4.1-dev0-native", "system_fingerprint": "3.1.2-dev0-native",
"usage": { "usage": {
"completion_tokens": 30, "completion_tokens": 30,
"prompt_tokens": 615, "prompt_tokens": 615,

View File

@ -1,7 +1,7 @@
{ {
"choices": [ "choices": [
{ {
"finish_reason": "eos_token", "finish_reason": "stop",
"index": 0, "index": 0,
"logprobs": null, "logprobs": null,
"message": { "message": {
@ -13,12 +13,12 @@
"function": { "function": {
"arguments": { "arguments": {
"format": "celsius", "format": "celsius",
"location": "New York, NY" "location": "Brooklyn, New York"
}, },
"description": null, "description": null,
"name": "get_current_weather" "name": "get_current_weather"
}, },
"id": 0, "id": "0",
"type": "function" "type": "function"
} }
] ]
@ -26,14 +26,14 @@
"usage": null "usage": null
} }
], ],
"created": 1712852394, "created": 1741195540,
"id": "", "id": "",
"model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "text_completion", "object": "chat.completion",
"system_fingerprint": "2.0.1-native", "system_fingerprint": "3.1.2-dev0-native",
"usage": { "usage": {
"completion_tokens": 48, "completion_tokens": 30,
"prompt_tokens": 320, "prompt_tokens": 326,
"total_tokens": 368 "total_tokens": 356
} }
} }

View File

@ -13,14 +13,14 @@
"usage": null "usage": null
} }
], ],
"created": 1728497062, "created": 1741195542,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion", "object": "chat.completion",
"system_fingerprint": "2.4.2-dev0-native", "system_fingerprint": "3.1.2-dev0-native",
"usage": { "usage": {
"completion_tokens": 23, "completion_tokens": 22,
"prompt_tokens": 604, "prompt_tokens": 608,
"total_tokens": 627 "total_tokens": 630
} }
} }

View File

@ -11,10 +11,10 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1728497531, "created": 1741195542,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
"system_fingerprint": "2.4.2-dev0-native", "system_fingerprint": "3.1.2-dev0-native",
"usage": null "usage": null
} }

View File

@ -0,0 +1,992 @@
[
{
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": "{\"",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741195536,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": "function",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741195536,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": "\":",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741195536,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": " {\"",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741195536,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": "_",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741195536,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": "name",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741195536,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": "\":",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741195536,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": " \"",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741195536,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": "get",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741195536,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": "_current",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741195536,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": "_weather",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741195536,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": "\",",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741195536,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": " \"",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741195536,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": "location",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741195536,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": "\":",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741195536,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": " \"",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741195536,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": "Bro",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741195536,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": "oklyn",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741195536,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": ",",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741195536,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": " New",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741195536,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": " York",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741195536,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": "\",",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741195536,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": " \"",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741195536,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": "format",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741195536,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": "\":",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741195537,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": " \"",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741195537,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": "c",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741195537,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": "elsius",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741195537,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": "\"}}",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741195537,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": "<|eot_id|>",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
]
},
"finish_reason": "stop",
"index": 0,
"logprobs": null
}
],
"created": 1741195537,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "3.1.2-dev0-native",
"usage": null
}
]

View File

@ -11,10 +11,10 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1728497461, "created": 1741195545,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
"system_fingerprint": "2.4.2-dev0-native", "system_fingerprint": "3.1.2-dev0-native",
"usage": null "usage": null
} }

View File

@ -3,25 +3,27 @@
{ {
"delta": { "delta": {
"role": "assistant", "role": "assistant",
"tool_calls": { "tool_calls": [
"function": { {
"arguments": "<|eot_id|>", "function": {
"name": null "arguments": "<|eot_id|>",
}, "name": null
"id": "", },
"index": 0, "id": "",
"type": "function" "index": 0,
} "type": "function"
}
]
}, },
"finish_reason": "stop", "finish_reason": "stop",
"index": 0, "index": 0,
"logprobs": null "logprobs": null
} }
], ],
"created": 1732293254, "created": 1741195554,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
"system_fingerprint": "2.4.1-dev0-native", "system_fingerprint": "3.1.2-dev0-native",
"usage": null "usage": null
} }

View File

@ -11,10 +11,10 @@
"logprobs": null "logprobs": null
} }
], ],
"created": 1729262528, "created": 1741195551,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
"system_fingerprint": "2.3.2-dev0-native", "system_fingerprint": "3.1.2-dev0-native",
"usage": null "usage": null
} }

View File

@ -4,25 +4,27 @@
"delta": { "delta": {
"content": null, "content": null,
"role": "assistant", "role": "assistant",
"tool_calls": { "tool_calls": [
"function": { {
"arguments": "<|eot_id|>", "function": {
"name": null "arguments": "<|eot_id|>",
}, "name": null
"id": "", },
"index": 0, "id": "",
"type": "function" "index": 0,
} "type": "function"
}
]
}, },
"finish_reason": "stop", "finish_reason": "stop",
"index": 0, "index": 0,
"logprobs": null "logprobs": null
} }
], ],
"created": 1732293246, "created": 1741195548,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
"system_fingerprint": "2.4.1-dev0-native", "system_fingerprint": "3.1.2-dev0-native",
"usage": null "usage": null
} }

View File

@ -4,25 +4,27 @@
"delta": { "delta": {
"content": null, "content": null,
"role": "assistant", "role": "assistant",
"tool_calls": { "tool_calls": [
"function": { {
"arguments": "<|eot_id|>", "function": {
"name": null "arguments": "<|eot_id|>",
}, "name": null
"id": "", },
"index": 0, "id": "",
"type": "function" "index": 0,
} "type": "function"
}
]
}, },
"finish_reason": "stop", "finish_reason": "stop",
"index": 0, "index": 0,
"logprobs": null "logprobs": null
} }
], ],
"created": 1732293235, "created": 1741195541,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
"system_fingerprint": "2.4.1-dev0-native", "system_fingerprint": "3.1.2-dev0-native",
"usage": null "usage": null
} }

View File

@ -13,11 +13,11 @@
"usage": null "usage": null
} }
], ],
"created": 1739932427, "created": 1741195556,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion", "object": "chat.completion",
"system_fingerprint": "3.1.1-dev0-native", "system_fingerprint": "3.1.2-dev0-native",
"usage": { "usage": {
"completion_tokens": 79, "completion_tokens": 79,
"prompt_tokens": 103, "prompt_tokens": 103,

View File

@ -1,6 +1,7 @@
import pytest import pytest
import requests import requests
import json import json
from openai import OpenAI
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
@ -108,6 +109,38 @@ async def test_flash_llama_grammar_tools(flash_llama_grammar_tools, response_sna
assert response == response_snapshot assert response == response_snapshot
@pytest.mark.asyncio
@pytest.mark.private
async def test_flash_llama_grammar_tools_openai(
flash_llama_grammar_tools, response_snapshot
):
client = OpenAI(api_key="xx", base_url=f"{flash_llama_grammar_tools.base_url}/v1")
stream = client.chat.completions.create(
model="tgi",
max_tokens=100,
seed=1,
tools=tools,
stream=True,
temperature=0.0,
messages=[
{
"role": "system",
"content": "Youre a helpful assistant! Answer the users question best you can.",
},
{
"role": "user",
"content": "What is the weather like in Brooklyn, New York?",
},
],
)
chunks = []
for chunk in stream:
chunks.append(chunk)
assert chunks == response_snapshot
@pytest.mark.asyncio @pytest.mark.asyncio
@pytest.mark.private @pytest.mark.private
async def test_flash_llama_grammar_tools_auto( async def test_flash_llama_grammar_tools_auto(
@ -213,7 +246,9 @@ async def test_flash_llama_grammar_tools_stream(
last_response = None last_response = None
async for response in responses: async for response in responses:
count += 1 count += 1
tool_calls_generated += response.choices[0].delta.tool_calls.function.arguments tool_calls_generated += (
response.choices[0].delta.tool_calls[0].function.arguments
)
last_response = response last_response = response
assert response.choices[0].delta.content is None assert response.choices[0].delta.content is None
@ -360,7 +395,9 @@ async def test_flash_llama_grammar_tools_sea_creatures_stream_required(
async for response in responses: async for response in responses:
count += 1 count += 1
assert response.choices[0].delta.content is None assert response.choices[0].delta.content is None
tool_calls_generated += response.choices[0].delta.tool_calls.function.arguments tool_calls_generated += (
response.choices[0].delta.tool_calls[0].function.arguments
)
last_response = response last_response = response
assert count == 29 assert count == 29
@ -458,8 +495,8 @@ async def test_flash_llama_grammar_tools_sea_creatures_stream_function_object(
break break
response = json.loads(line) response = json.loads(line)
tool_calls_generated += response["choices"][0]["delta"]["tool_calls"][ tool_calls_generated += response["choices"][0]["delta"]["tool_calls"][
"function" 0
]["arguments"] ]["function"]["arguments"]
last_response = response last_response = response
assert count == 39 assert count == 39

View File

@ -13,6 +13,7 @@ dependencies = [
"pytest-asyncio>=0.23.1", "pytest-asyncio>=0.23.1",
"docker>=7", "docker>=7",
"numpy>=2.0", "numpy>=2.0",
"openai>=1.65",
] ]
[tool.isort] [tool.isort]

View File

@ -1,5 +1,5 @@
# This file was autogenerated by uv via the following command: # This file was autogenerated by uv via the following command:
# uv pip compile pyproject.toml --output-file requirements.txt # uv pip compile pyproject.toml -o requirements.txt
aiohappyeyeballs==2.4.6 aiohappyeyeballs==2.4.6
# via aiohttp # via aiohttp
aiohttp==3.11.12 aiohttp==3.11.12
@ -8,12 +8,21 @@ aiosignal==1.3.2
# via aiohttp # via aiohttp
annotated-types==0.7.0 annotated-types==0.7.0
# via pydantic # via pydantic
anyio==4.8.0
# via
# httpx
# openai
attrs==25.1.0 attrs==25.1.0
# via aiohttp # via aiohttp
certifi==2025.1.31 certifi==2025.1.31
# via requests # via
# httpcore
# httpx
# requests
charset-normalizer==3.4.1 charset-normalizer==3.4.1
# via requests # via requests
distro==1.9.0
# via openai
docker==7.1.0 docker==7.1.0
# via text-generation-integration-tests (pyproject.toml) # via text-generation-integration-tests (pyproject.toml)
filelock==3.17.0 filelock==3.17.0
@ -24,20 +33,32 @@ frozenlist==1.5.0
# aiosignal # aiosignal
fsspec==2025.2.0 fsspec==2025.2.0
# via huggingface-hub # via huggingface-hub
h11==0.14.0
# via httpcore
httpcore==1.0.7
# via httpx
httpx==0.28.1
# via openai
huggingface-hub==0.29.0 huggingface-hub==0.29.0
# via text-generation # via text-generation
idna==3.10 idna==3.10
# via # via
# anyio
# httpx
# requests # requests
# yarl # yarl
iniconfig==2.0.0 iniconfig==2.0.0
# via pytest # via pytest
jiter==0.8.2
# via openai
multidict==6.1.0 multidict==6.1.0
# via # via
# aiohttp # aiohttp
# yarl # yarl
numpy==2.2.3 numpy==2.2.3
# via text-generation-integration-tests (pyproject.toml) # via text-generation-integration-tests (pyproject.toml)
openai==1.65.3
# via text-generation-integration-tests (pyproject.toml)
packaging==24.2 packaging==24.2
# via # via
# huggingface-hub # huggingface-hub
@ -51,6 +72,7 @@ propcache==0.2.1
pydantic==2.10.6 pydantic==2.10.6
# via # via
# text-generation-integration-tests (pyproject.toml) # text-generation-integration-tests (pyproject.toml)
# openai
# text-generation # text-generation
pydantic-core==2.27.2 pydantic-core==2.27.2
# via pydantic # via pydantic
@ -67,15 +89,23 @@ requests==2.32.3
# via # via
# docker # docker
# huggingface-hub # huggingface-hub
sniffio==1.3.1
# via
# anyio
# openai
syrupy==4.8.1 syrupy==4.8.1
# via text-generation-integration-tests (pyproject.toml) # via text-generation-integration-tests (pyproject.toml)
text-generation==0.7.0 text-generation==0.7.0
# via text-generation-integration-tests (pyproject.toml) # via text-generation-integration-tests (pyproject.toml)
tqdm==4.67.1 tqdm==4.67.1
# via huggingface-hub
typing-extensions==4.12.2
# via # via
# huggingface-hub # huggingface-hub
# openai
typing-extensions==4.12.2
# via
# anyio
# huggingface-hub
# openai
# pydantic # pydantic
# pydantic-core # pydantic-core
urllib3==2.3.0 urllib3==2.3.0

View File

@ -18,8 +18,8 @@ final: prev: {
src = final.fetchFromGitHub { src = final.fetchFromGitHub {
owner = "huggingface"; owner = "huggingface";
repo = "transformers"; repo = "transformers";
rev = "8d73a38606bc342b370afe1f42718b4828d95aaa"; rev = "v4.49.0";
hash = "sha256-MxroG6CWqrcmRS+eFt7Ej87TDOInN15aRPBUcaycKTI="; hash = "sha256-drq7RWoRaRejiQjCUHIYuzaKa9rA4eQZI2do74scp1c=";
}; };
} }
); );

View File

@ -730,7 +730,7 @@ pub(crate) struct ChatCompletionChoice {
pub struct ToolCallDelta { pub struct ToolCallDelta {
#[schema(example = "assistant")] #[schema(example = "assistant")]
role: String, role: String,
tool_calls: DeltaToolCall, tool_calls: Vec<DeltaToolCall>,
} }
#[derive(Clone, Debug, Serialize, ToSchema)] #[derive(Clone, Debug, Serialize, ToSchema)]
@ -774,7 +774,7 @@ impl ChatCompletionChunk {
}), }),
(None, Some(tool_calls)) => ChatCompletionDelta::Tool(ToolCallDelta { (None, Some(tool_calls)) => ChatCompletionDelta::Tool(ToolCallDelta {
role: "assistant".to_string(), role: "assistant".to_string(),
tool_calls: DeltaToolCall { tool_calls: vec![DeltaToolCall {
index: 0, index: 0,
id: String::new(), id: String::new(),
r#type: "function".to_string(), r#type: "function".to_string(),
@ -782,7 +782,7 @@ impl ChatCompletionChunk {
name: None, name: None,
arguments: tool_calls[0].to_string(), arguments: tool_calls[0].to_string(),
}, },
}, }],
}), }),
(None, None) => ChatCompletionDelta::Chat(TextMessage { (None, None) => ChatCompletionDelta::Chat(TextMessage {
role: "assistant".to_string(), role: "assistant".to_string(),