diff --git a/clients/python/text_generation/types.py b/clients/python/text_generation/types.py index 1085075e..6f51c153 100644 --- a/clients/python/text_generation/types.py +++ b/clients/python/text_generation/types.py @@ -67,7 +67,7 @@ class ChoiceDeltaToolCall(BaseModel): class ChoiceDelta(BaseModel): role: str content: Optional[str] = None - tool_calls: Optional[ChoiceDeltaToolCall] = None + tool_calls: Optional[List[ChoiceDeltaToolCall]] = None class Choice(BaseModel): diff --git a/integration-tests/conftest.py b/integration-tests/conftest.py index 0ffcd162..4477f647 100644 --- a/integration-tests/conftest.py +++ b/integration-tests/conftest.py @@ -269,7 +269,17 @@ class ResponseComparator(JSONSnapshotExtension): def eq_chat_complete_chunk( response: ChatCompletionChunk, other: ChatCompletionChunk ) -> bool: - return response.choices[0].delta.content == other.choices[0].delta.content + if response.choices[0].delta.content: + return ( + response.choices[0].delta.content == other.choices[0].delta.content + ) + elif response.choices[0].delta.tool_calls: + return ( + response.choices[0].delta.tool_calls + == other.choices[0].delta.tool_calls + ) + else: + raise RuntimeError("Invalid empty chat chunk") def eq_response(response: Response, other: Response) -> bool: return response.generated_text == other.generated_text and eq_details( diff --git a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_openai.json b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_openai.json new file mode 100644 index 00000000..b91610a3 --- /dev/null +++ b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_openai.json @@ -0,0 +1,992 @@ +[ + { + "choices": [ + { + "delta": { + "content": null, + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": "{\"", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741183959, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": "function", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741183959, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": "\":", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741183959, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": " {\"", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741183959, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": "_", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741183959, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": "name", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741183959, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": "\":", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741183959, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": " \"", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741183959, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": "get", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741183959, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": "_current", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741183959, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": "_weather", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741183959, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": "\",", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741183959, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": " \"", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741183959, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": "location", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741183959, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": "\":", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741183960, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": " \"", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741183960, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": "Bro", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741183960, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": "oklyn", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741183960, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": ",", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741183960, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": " New", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741183960, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": " York", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741183960, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": "\",", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741183960, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": " \"", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741183960, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": "format", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741183960, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": "\":", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741183960, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": " \"", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741183960, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": "c", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741183960, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": "elsius", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741183960, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": "\"}}", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741183960, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": "<|eot_id|>", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": "stop", + "index": 0, + "logprobs": null + } + ], + "created": 1741183960, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + } +] diff --git a/integration-tests/models/test_tools_llama.py b/integration-tests/models/test_tools_llama.py index b8a90cff..1325669f 100644 --- a/integration-tests/models/test_tools_llama.py +++ b/integration-tests/models/test_tools_llama.py @@ -1,6 +1,7 @@ import pytest import requests import json +from openai import OpenAI @pytest.fixture(scope="module") @@ -108,6 +109,38 @@ async def test_flash_llama_grammar_tools(flash_llama_grammar_tools, response_sna assert response == response_snapshot +@pytest.mark.asyncio +@pytest.mark.private +async def test_flash_llama_grammar_tools_openai( + flash_llama_grammar_tools, response_snapshot +): + client = OpenAI(api_key="xx", base_url=f"{flash_llama_grammar_tools.base_url}/v1") + stream = client.chat.completions.create( + model="tgi", + max_tokens=100, + seed=1, + tools=tools, + stream=True, + temperature=0.0, + messages=[ + { + "role": "system", + "content": "Youre a helpful assistant! Answer the users question best you can.", + }, + { + "role": "user", + "content": "What is the weather like in Brooklyn, New York?", + }, + ], + ) + + chunks = [] + for chunk in stream: + chunks.append(chunk) + + assert chunks == response_snapshot + + @pytest.mark.asyncio @pytest.mark.private async def test_flash_llama_grammar_tools_auto( diff --git a/router/src/lib.rs b/router/src/lib.rs index 637e6c56..60f1f73a 100644 --- a/router/src/lib.rs +++ b/router/src/lib.rs @@ -730,7 +730,7 @@ pub(crate) struct ChatCompletionChoice { pub struct ToolCallDelta { #[schema(example = "assistant")] role: String, - tool_calls: DeltaToolCall, + tool_calls: Vec, } #[derive(Clone, Debug, Serialize, ToSchema)] @@ -774,7 +774,7 @@ impl ChatCompletionChunk { }), (None, Some(tool_calls)) => ChatCompletionDelta::Tool(ToolCallDelta { role: "assistant".to_string(), - tool_calls: DeltaToolCall { + tool_calls: vec![DeltaToolCall { index: 0, id: String::new(), r#type: "function".to_string(), @@ -782,7 +782,7 @@ impl ChatCompletionChunk { name: None, arguments: tool_calls[0].to_string(), }, - }, + }], }), (None, None) => ChatCompletionDelta::Chat(TextMessage { role: "assistant".to_string(),