diff --git a/integration-tests/conftest.py b/integration-tests/conftest.py index 01250ce2..6490f833 100644 --- a/integration-tests/conftest.py +++ b/integration-tests/conftest.py @@ -1,6 +1,13 @@ pytest_plugins = ["fixtures.neuron.service", "fixtures.neuron.export_models"] # ruff: noqa: E402 from _pytest.fixtures import SubRequest +from huggingface_hub.inference._generated.types.chat_completion import ( + ChatCompletionStreamOutput, + ChatCompletionOutput, +) +from openai.types.chat.chat_completion_chunk import ( + ChatCompletionChunk as OAIChatCompletionChunk, +) import requests @@ -115,6 +122,31 @@ class ResponseComparator(JSONSnapshotExtension): rtol = 0.2 ignore_logprob = False + def _serialize( + self, + data, + ): + if ( + isinstance(data, Response) + or isinstance(data, ChatComplete) + or isinstance(data, ChatCompletionChunk) + or isinstance(data, ChatCompletionComplete) + or isinstance(data, Completion) + or isinstance(data, OAIChatCompletionChunk) + ): + data = data.model_dump() + elif isinstance(data, ChatCompletionStreamOutput) or isinstance( + data, ChatCompletionOutput + ): + data = dict(data) + elif isinstance(data, List): + data = [self._serialize(d) for d in data] + elif isinstance(data, dict): + return data + else: + raise RuntimeError(f"Unexpected data {type(data)} : {data}") + return data + def serialize( self, data, @@ -123,17 +155,7 @@ class ResponseComparator(JSONSnapshotExtension): exclude=None, matcher=None, ): - if ( - isinstance(data, Response) - or isinstance(data, ChatComplete) - or isinstance(data, ChatCompletionChunk) - or isinstance(data, ChatCompletionComplete) - ): - data = data.model_dump() - - if isinstance(data, List): - data = [d.model_dump() for d in data] - + data = self._serialize(data) data = self._filter( data=data, depth=0, @@ -142,7 +164,8 @@ class ResponseComparator(JSONSnapshotExtension): include=include, matcher=matcher, ) - return json.dumps(data, indent=2, ensure_ascii=False, sort_keys=False) + "\n" + data = json.dumps(data, indent=2, ensure_ascii=False, sort_keys=False) + "\n" + return data def matches( self, @@ -158,7 +181,7 @@ class ResponseComparator(JSONSnapshotExtension): if isinstance(data, Dict): if "choices" in data: data["choices"] = list( - sorted(data["choices"], key=lambda x: x["index"]) + sorted(data["choices"], key=lambda x: int(x["index"])) ) choices = data["choices"] if isinstance(choices, List) and len(choices) >= 1: @@ -171,7 +194,7 @@ class ResponseComparator(JSONSnapshotExtension): return Response(**data) if isinstance(data, List): return [_convert_data(d) for d in data] - raise NotImplementedError + raise NotImplementedError(f"Data: {data}") def eq_token(token: Token, other: Token) -> bool: return ( @@ -269,17 +292,25 @@ class ResponseComparator(JSONSnapshotExtension): def eq_chat_complete_chunk( response: ChatCompletionChunk, other: ChatCompletionChunk ) -> bool: - if response.choices[0].delta.content is not None: - return ( - response.choices[0].delta.content == other.choices[0].delta.content - ) - elif response.choices[0].delta.tool_calls is not None: - return ( - response.choices[0].delta.tool_calls - == other.choices[0].delta.tool_calls - ) + if response.choices: + if response.choices[0].delta.content is not None: + return ( + response.choices[0].delta.content + == other.choices[0].delta.content + ) + elif response.choices[0].delta.tool_calls is not None: + return ( + response.choices[0].delta.tool_calls + == other.choices[0].delta.tool_calls + ) + else: + raise RuntimeError( + f"Invalid empty chat chunk {response} vs {other}" + ) + elif response.usage is not None: + return response.usage == other.usage else: - raise RuntimeError(f"Invalid empty chat chunk {response} vs {other}") + raise RuntimeError(f"Invalid empty chat {response} vs {other}") def eq_response(response: Response, other: Response) -> bool: return response.generated_text == other.generated_text and eq_details( @@ -294,6 +325,9 @@ class ResponseComparator(JSONSnapshotExtension): if not isinstance(snapshot_data, List): snapshot_data = [snapshot_data] + if len(serialized_data) == 0: + return len(snapshot_data) == len(serialized_data) + if isinstance(serialized_data[0], Completion): return len(snapshot_data) == len(serialized_data) and all( [eq_completion(r, o) for r, o in zip(serialized_data, snapshot_data)] diff --git a/integration-tests/models/__snapshots__/test_completion_prompts/test_flash_llama_completion_stream_usage.json b/integration-tests/models/__snapshots__/test_completion_prompts/test_flash_llama_completion_stream_usage.json index 8c7be4cb..fbb3669f 100644 --- a/integration-tests/models/__snapshots__/test_completion_prompts/test_flash_llama_completion_stream_usage.json +++ b/integration-tests/models/__snapshots__/test_completion_prompts/test_flash_llama_completion_stream_usage.json @@ -12,11 +12,11 @@ "logprobs": null } ], - "created": 1726656043, + "created": 1741338471, "id": "", - "model": "meta-llama/Meta-Llama-3.1-8B-Instruct", + "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", - "system_fingerprint": "2.2.1-dev0-native", + "system_fingerprint": "3.1.2-dev0-native", "usage": null }, { @@ -32,11 +32,11 @@ "logprobs": null } ], - "created": 1726656043, + "created": 1741338471, "id": "", - "model": "meta-llama/Meta-Llama-3.1-8B-Instruct", + "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", - "system_fingerprint": "2.2.1-dev0-native", + "system_fingerprint": "3.1.2-dev0-native", "usage": null }, { @@ -52,11 +52,11 @@ "logprobs": null } ], - "created": 1726656043, + "created": 1741338471, "id": "", - "model": "meta-llama/Meta-Llama-3.1-8B-Instruct", + "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", - "system_fingerprint": "2.2.1-dev0-native", + "system_fingerprint": "3.1.2-dev0-native", "usage": null }, { @@ -72,11 +72,11 @@ "logprobs": null } ], - "created": 1726656043, + "created": 1741338471, "id": "", - "model": "meta-llama/Meta-Llama-3.1-8B-Instruct", + "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", - "system_fingerprint": "2.2.1-dev0-native", + "system_fingerprint": "3.1.2-dev0-native", "usage": null }, { @@ -92,11 +92,11 @@ "logprobs": null } ], - "created": 1726656043, + "created": 1741338472, "id": "", - "model": "meta-llama/Meta-Llama-3.1-8B-Instruct", + "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", - "system_fingerprint": "2.2.1-dev0-native", + "system_fingerprint": "3.1.2-dev0-native", "usage": null }, { @@ -112,11 +112,11 @@ "logprobs": null } ], - "created": 1726656043, + "created": 1741338472, "id": "", - "model": "meta-llama/Meta-Llama-3.1-8B-Instruct", + "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", - "system_fingerprint": "2.2.1-dev0-native", + "system_fingerprint": "3.1.2-dev0-native", "usage": null }, { @@ -132,11 +132,11 @@ "logprobs": null } ], - "created": 1726656044, + "created": 1741338472, "id": "", - "model": "meta-llama/Meta-Llama-3.1-8B-Instruct", + "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", - "system_fingerprint": "2.2.1-dev0-native", + "system_fingerprint": "3.1.2-dev0-native", "usage": null }, { @@ -152,11 +152,11 @@ "logprobs": null } ], - "created": 1726656044, + "created": 1741338472, "id": "", - "model": "meta-llama/Meta-Llama-3.1-8B-Instruct", + "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", - "system_fingerprint": "2.2.1-dev0-native", + "system_fingerprint": "3.1.2-dev0-native", "usage": null }, { @@ -172,11 +172,11 @@ "logprobs": null } ], - "created": 1726656044, + "created": 1741338472, "id": "", - "model": "meta-llama/Meta-Llama-3.1-8B-Instruct", + "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", - "system_fingerprint": "2.2.1-dev0-native", + "system_fingerprint": "3.1.2-dev0-native", "usage": null }, { @@ -192,11 +192,11 @@ "logprobs": null } ], - "created": 1726656044, + "created": 1741338472, "id": "", - "model": "meta-llama/Meta-Llama-3.1-8B-Instruct", + "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", - "system_fingerprint": "2.2.1-dev0-native", + "system_fingerprint": "3.1.2-dev0-native", "usage": { "completion_tokens": 10, "prompt_tokens": 40, diff --git a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools.json b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_auto_nostream.json similarity index 66% rename from integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools.json rename to integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_auto_nostream.json index 7445099f..06cf038a 100644 --- a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools.json +++ b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_auto_nostream.json @@ -6,15 +6,11 @@ "logprobs": null, "message": { "content": null, - "name": null, "role": "assistant", "tool_calls": [ { "function": { - "arguments": { - "format": "celsius", - "location": "Brooklyn, New York" - }, + "arguments": "{\"format\":\"fahrenheit\",\"location\":\"Brooklyn, NY\"}", "description": null, "name": "get_current_weather" }, @@ -22,18 +18,17 @@ "type": "function" } ] - }, - "usage": null + } } ], - "created": 1741195536, + "created": 1741263682, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion", "system_fingerprint": "3.1.2-dev0-native", "usage": { - "completion_tokens": 30, - "prompt_tokens": 615, - "total_tokens": 645 + "completion_tokens": 29, + "prompt_tokens": 501, + "total_tokens": 530 } } diff --git a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_auto.json b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_choice_nostream.json similarity index 66% rename from integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_auto.json rename to integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_choice_nostream.json index 99018f96..0152ea70 100644 --- a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_auto.json +++ b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_choice_nostream.json @@ -6,15 +6,11 @@ "logprobs": null, "message": { "content": null, - "name": null, "role": "assistant", "tool_calls": [ { "function": { - "arguments": { - "format": "celsius", - "location": "Brooklyn, New York" - }, + "arguments": "{\"format\":\"fahrenheit\",\"location\":\"Brooklyn, NY\"}", "description": null, "name": "get_current_weather" }, @@ -22,18 +18,17 @@ "type": "function" } ] - }, - "usage": null + } } ], - "created": 1741195538, + "created": 1741263684, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion", "system_fingerprint": "3.1.2-dev0-native", "usage": { - "completion_tokens": 30, - "prompt_tokens": 615, - "total_tokens": 645 + "completion_tokens": 29, + "prompt_tokens": 286, + "total_tokens": 315 } } diff --git a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_choice_stream.json b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_choice_stream.json new file mode 100644 index 00000000..8dab9a5b --- /dev/null +++ b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_choice_stream.json @@ -0,0 +1,842 @@ +[ + { + "choices": [ + { + "delta": { + "content": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": "{\"", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263685, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": "function", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263685, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": "\":", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263685, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": " {\"", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263685, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": "_", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263685, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": "name", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263685, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": "\":", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263685, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": " \"", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263685, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": "get", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263685, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": "_current", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263685, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": "_weather", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263685, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": "\",", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263685, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": " \"", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263685, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": "location", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263685, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": "\":", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263685, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": " \"", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263685, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": "Paris", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263685, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": ",", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263685, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": " France", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263685, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": "\",", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263685, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": " \"", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263685, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": "format", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263685, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": "\":", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263685, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": " \"", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263685, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": "c", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263685, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": "elsius", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263685, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": "\"}}", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263685, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": "<|eot_id|>", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": "stop", + "index": 0, + "logprobs": null + } + ], + "created": 1741263685, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + } +] diff --git a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_insufficient_information.json b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_insufficient_information_nostream.json similarity index 62% rename from integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_insufficient_information.json rename to integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_insufficient_information_nostream.json index 9cfea791..797c9578 100644 --- a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_insufficient_information.json +++ b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_insufficient_information_nostream.json @@ -5,22 +5,20 @@ "index": 0, "logprobs": null, "message": { - "content": "I am an AI assistant", - "name": null, + "content": "I am a helpful assistant!", "role": "assistant", "tool_calls": null - }, - "usage": null + } } ], - "created": 1741195542, + "created": 1741263686, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion", "system_fingerprint": "3.1.2-dev0-native", "usage": { - "completion_tokens": 22, - "prompt_tokens": 608, - "total_tokens": 630 + "completion_tokens": 23, + "prompt_tokens": 494, + "total_tokens": 517 } } diff --git a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_insufficient_information_stream.json b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_insufficient_information_stream.json index 34615f8e..b1d4fb87 100644 --- a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_insufficient_information_stream.json +++ b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_insufficient_information_stream.json @@ -1,20 +1,102 @@ -{ - "choices": [ - { - "delta": { - "content": " assistant", - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741195542, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null -} +[ + { + "choices": [ + { + "delta": { + "content": "I", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263687, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " am", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263687, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " a", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263687, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " helpful", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263687, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " assistant", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263687, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + } +] diff --git a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_choice.json b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_nostream.json similarity index 66% rename from integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_choice.json rename to integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_nostream.json index a80a6a23..3b22d83e 100644 --- a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_choice.json +++ b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_nostream.json @@ -6,15 +6,11 @@ "logprobs": null, "message": { "content": null, - "name": null, "role": "assistant", "tool_calls": [ { "function": { - "arguments": { - "format": "celsius", - "location": "Brooklyn, New York" - }, + "arguments": "{\"format\":\"fahrenheit\",\"location\":\"Brooklyn, NY\"}", "description": null, "name": "get_current_weather" }, @@ -22,18 +18,17 @@ "type": "function" } ] - }, - "usage": null + } } ], - "created": 1741195540, + "created": 1741263680, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion", "system_fingerprint": "3.1.2-dev0-native", "usage": { - "completion_tokens": 30, - "prompt_tokens": 326, - "total_tokens": 356 + "completion_tokens": 29, + "prompt_tokens": 501, + "total_tokens": 530 } } diff --git a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_openai.json b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_openai.json index e6d78924..c8fc50a2 100644 --- a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_openai.json +++ b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_openai.json @@ -24,7 +24,7 @@ "logprobs": null } ], - "created": 1741195536, + "created": 1741263681, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -57,7 +57,7 @@ "logprobs": null } ], - "created": 1741195536, + "created": 1741263681, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -90,7 +90,7 @@ "logprobs": null } ], - "created": 1741195536, + "created": 1741263681, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -123,7 +123,7 @@ "logprobs": null } ], - "created": 1741195536, + "created": 1741263681, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -156,7 +156,7 @@ "logprobs": null } ], - "created": 1741195536, + "created": 1741263681, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -189,7 +189,7 @@ "logprobs": null } ], - "created": 1741195536, + "created": 1741263681, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -222,7 +222,7 @@ "logprobs": null } ], - "created": 1741195536, + "created": 1741263681, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -255,7 +255,7 @@ "logprobs": null } ], - "created": 1741195536, + "created": 1741263681, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -288,7 +288,7 @@ "logprobs": null } ], - "created": 1741195536, + "created": 1741263681, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -321,7 +321,7 @@ "logprobs": null } ], - "created": 1741195536, + "created": 1741263681, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -354,7 +354,7 @@ "logprobs": null } ], - "created": 1741195536, + "created": 1741263681, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -387,7 +387,7 @@ "logprobs": null } ], - "created": 1741195536, + "created": 1741263681, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -420,7 +420,7 @@ "logprobs": null } ], - "created": 1741195536, + "created": 1741263681, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -453,7 +453,7 @@ "logprobs": null } ], - "created": 1741195536, + "created": 1741263681, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -486,7 +486,7 @@ "logprobs": null } ], - "created": 1741195536, + "created": 1741263681, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -519,7 +519,7 @@ "logprobs": null } ], - "created": 1741195536, + "created": 1741263681, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -552,7 +552,7 @@ "logprobs": null } ], - "created": 1741195536, + "created": 1741263681, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -585,7 +585,7 @@ "logprobs": null } ], - "created": 1741195536, + "created": 1741263681, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -618,7 +618,7 @@ "logprobs": null } ], - "created": 1741195536, + "created": 1741263681, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -637,7 +637,7 @@ "tool_calls": [ { "function": { - "arguments": " New", + "arguments": " NY", "name": null }, "id": "", @@ -651,40 +651,7 @@ "logprobs": null } ], - "created": 1741195536, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": null, - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": [ - { - "function": { - "arguments": " York", - "name": null - }, - "id": "", - "index": 0, - "type": "function" - } - ] - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741195536, + "created": 1741263681, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -717,7 +684,7 @@ "logprobs": null } ], - "created": 1741195536, + "created": 1741263681, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -750,7 +717,7 @@ "logprobs": null } ], - "created": 1741195536, + "created": 1741263681, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -783,7 +750,7 @@ "logprobs": null } ], - "created": 1741195536, + "created": 1741263681, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -816,7 +783,7 @@ "logprobs": null } ], - "created": 1741195537, + "created": 1741263681, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -849,7 +816,7 @@ "logprobs": null } ], - "created": 1741195537, + "created": 1741263681, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -868,7 +835,7 @@ "tool_calls": [ { "function": { - "arguments": "c", + "arguments": "f", "name": null }, "id": "", @@ -882,7 +849,7 @@ "logprobs": null } ], - "created": 1741195537, + "created": 1741263681, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -901,7 +868,7 @@ "tool_calls": [ { "function": { - "arguments": "elsius", + "arguments": "ahrenheit", "name": null }, "id": "", @@ -915,7 +882,7 @@ "logprobs": null } ], - "created": 1741195537, + "created": 1741263681, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -948,7 +915,7 @@ "logprobs": null } ], - "created": 1741195537, + "created": 1741263681, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -981,7 +948,7 @@ "logprobs": null } ], - "created": 1741195537, + "created": 1741263681, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", diff --git a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_sea_creatures_stream.json b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_sea_creatures_stream.json deleted file mode 100644 index 11644190..00000000 --- a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_sea_creatures_stream.json +++ /dev/null @@ -1,20 +0,0 @@ -{ - "choices": [ - { - "delta": { - "content": " fans", - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741195545, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null -} diff --git a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_sea_creatures_stream_auto.json b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_sea_creatures_stream_auto.json new file mode 100644 index 00000000..4b0f5a07 --- /dev/null +++ b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_sea_creatures_stream_auto.json @@ -0,0 +1,1542 @@ +[ + { + "choices": [ + { + "delta": { + "content": "There", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263688, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " was", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263688, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " a", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263688, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " wise", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263688, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " old", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263688, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " oct", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263688, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "opus", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263688, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " named", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263688, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " Oracle", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263688, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": ".", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263688, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " He", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263688, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " lived", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263688, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " in", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263688, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " a", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263688, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " cozy", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263688, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " little", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263688, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " cave", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263688, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " beneath", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263688, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " the", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263688, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " waves", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263688, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " with", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263688, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " his", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263688, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " best", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263688, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " friend", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263688, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": ",", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263688, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " a", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263689, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " curious", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263689, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " se", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263689, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "ah", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263689, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "orse", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263689, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " named", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263689, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " Fin", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263689, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "ley", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263689, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": ".", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263689, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " One", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263689, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " day", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263689, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": ",", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263689, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " Fin", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263689, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "ley", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263689, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " met", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263689, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " a", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263689, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " playful", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263689, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " dolphin", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263689, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " named", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263689, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " Daisy", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263689, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": ",", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263689, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " and", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263689, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " the", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263689, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " three", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263689, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " became", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263689, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " inse", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263689, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "parable", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263689, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": ".", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263689, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " They", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263689, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " spent", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263690, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " their", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263690, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " days", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263690, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " exploring", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263690, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " the", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263690, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " ocean", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263690, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": ",", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263690, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " playing", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263690, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " hide", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263690, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "-and", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263690, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "-se", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263690, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "ek", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263690, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": ",", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263690, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " and", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263690, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " learning", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263690, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " about", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263690, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " the", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263690, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " wonders", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263690, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " of", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263690, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " the", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263690, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " sea", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263690, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " from", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263690, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " Oracle", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263690, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + } +] diff --git a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_sea_creatures_stream_function_object.json b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_sea_creatures_stream_function_object.json index 713e7a56..b253d465 100644 --- a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_sea_creatures_stream_function_object.json +++ b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_sea_creatures_stream_function_object.json @@ -1,29 +1,1232 @@ -{ - "choices": [ - { - "delta": { - "role": "assistant", - "tool_calls": [ - { - "function": { - "arguments": "<|eot_id|>", - "name": null - }, - "id": "", - "index": 0, - "type": "function" - } - ] - }, - "finish_reason": "stop", - "index": 0, - "logprobs": null - } - ], - "created": 1741195554, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null -} +[ + { + "choices": [ + { + "delta": { + "content": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": "{\"", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263698, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": "function", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263698, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": "\":", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263698, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": " {\"", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263698, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": "_", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263698, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": "n", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263698, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": "am", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263698, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": "e", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263698, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": "\":", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263698, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": " \"", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263698, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": "get", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263698, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": "_n", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263698, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": "_day", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263698, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": "_weather", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263698, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": "_fore", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263698, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": "cast", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263698, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": "\",", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263698, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": " \"", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263698, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": "location", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263698, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": "\":", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263698, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": " \"", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263698, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": "San", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263699, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": " Francisco", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263699, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": ",", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263699, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": " CA", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263699, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": "\",", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263699, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": " \"", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263699, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": "format", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263699, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": "\":", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263699, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": " \"", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263699, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": "c", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263699, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": "elsius", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263699, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": "\",", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263699, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": " \"", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263699, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": "num", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263699, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": "_days", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263699, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": "\":", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263699, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": " ", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263699, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": "3", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263699, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": "}}", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263699, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": null, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": "<|eot_id|>", + "name": null + }, + "id": "", + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": "stop", + "index": 0, + "logprobs": null + } + ], + "created": 1741263699, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + } +] diff --git a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_sea_creatures_stream_none.json b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_sea_creatures_stream_none.json index bde28149..9a4a6b0c 100644 --- a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_sea_creatures_stream_none.json +++ b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_sea_creatures_stream_none.json @@ -1,20 +1,2002 @@ -{ - "choices": [ - { - "delta": { - "content": " deep", - "role": "assistant", - "tool_calls": null - }, - "finish_reason": "length", - "index": 0, - "logprobs": null - } - ], - "created": 1741195551, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null -} +[ + { + "choices": [ + { + "delta": { + "content": "Once", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263693, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " upon", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263693, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " a", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263693, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " time", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263693, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": ",", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263694, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " in", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263694, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " a", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263694, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " vibrant", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263694, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " ocean", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263694, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " filled", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263694, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " with", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263694, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " coral", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263694, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " reefs", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263694, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " and", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263694, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " schools", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263694, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " of", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263694, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " shimmer", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263694, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "ing", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263694, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " fish", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263694, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": ",", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263694, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " lived", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263694, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " three", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263694, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " dear", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263694, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " friends", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263694, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": ":", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263694, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " Luna", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263694, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " the", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263694, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " sea", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263694, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " turtle", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263694, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": ",", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263694, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " Fin", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263694, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "ley", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263694, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " the", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263694, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " friendly", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263694, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " fish", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263694, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": ",", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263695, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " and", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263695, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " Cr", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263695, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "usty", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263695, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " the", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263695, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " wise", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263695, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " crab", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263695, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": ".\n\n", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263695, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "L", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263695, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "una", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263695, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " was", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263695, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " the", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263695, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " oldest", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263695, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " of", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263695, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " the", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263695, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " three", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263695, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": ".", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263695, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " She", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263695, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " had", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263695, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " traveled", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263695, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " the", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263695, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " world", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263695, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": ",", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263695, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " exploring", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263695, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " hidden", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263695, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " caves", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263695, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " and", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263695, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " ship", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263695, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "w", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263695, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "re", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263695, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "cks", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263696, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": ",", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263696, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " and", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263696, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " collecting", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263696, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " sparkling", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263696, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " shells", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263696, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " and", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263696, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " shiny", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263696, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " pe", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263696, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "bb", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263696, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "les", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263696, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": ".", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263696, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " Her", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263696, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " shell", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263696, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " was", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263696, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " a", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263696, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " beautiful", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263696, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " mosaic", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263696, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " of", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263696, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " blues", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263696, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " and", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263696, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " greens", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263696, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": ",", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263696, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " and", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263696, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " her", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263696, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " gentle", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263696, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " eyes", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263696, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " twink", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263696, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "led", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263696, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " with", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263696, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " the", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263696, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " secrets", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263697, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " of", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263697, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " the", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741263697, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " deep", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": "length", + "index": 0, + "logprobs": null + } + ], + "created": 1741263697, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + } +] diff --git a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_sea_creatures_stream_required.json b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_sea_creatures_stream_required.json index 7896607a..fe51488c 100644 --- a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_sea_creatures_stream_required.json +++ b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_sea_creatures_stream_required.json @@ -1,30 +1 @@ -{ - "choices": [ - { - "delta": { - "content": null, - "role": "assistant", - "tool_calls": [ - { - "function": { - "arguments": "<|eot_id|>", - "name": null - }, - "id": "", - "index": 0, - "type": "function" - } - ] - }, - "finish_reason": "stop", - "index": 0, - "logprobs": null - } - ], - "created": 1741195548, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null -} +[] diff --git a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_stream.json b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_stream.json deleted file mode 100644 index 92d27f61..00000000 --- a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_stream.json +++ /dev/null @@ -1,30 +0,0 @@ -{ - "choices": [ - { - "delta": { - "content": null, - "role": "assistant", - "tool_calls": [ - { - "function": { - "arguments": "<|eot_id|>", - "name": null - }, - "id": "", - "index": 0, - "type": "function" - } - ] - }, - "finish_reason": "stop", - "index": 0, - "logprobs": null - } - ], - "created": 1741195541, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null -} diff --git a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_tool_reply_response.json b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_tool_reply_response.json index 33a3bb43..45161f35 100644 --- a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_tool_reply_response.json +++ b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_tool_reply_response.json @@ -5,22 +5,20 @@ "index": 0, "logprobs": null, "message": { - "content": "I can't access real-time data, but I can provide you with current conditions and forecast for Paris, France:\n\nThe current conditions in Paris are mostly cloudy with a temperature of 6.7°C (44.1°F). \n\nPlease note that the actual weather may differ from this information, and I recommend checking the forecast on a reliable weather website for the most up-to-date information.", - "name": null, + "content": "I can't access real-time data, but I can provide you with current conditions and forecast for Paris, France:\n\nThe current conditions in Paris are mostly cloudy with a temperature of 6.7°C (44.1°F). \n\nPlease note that the actual weather may differ from the provided information. For up-to-date information, I suggest checking a reliable weather website or app for the latest conditions and forecast.", "role": "assistant", "tool_calls": null - }, - "usage": null + } } ], - "created": 1741195556, + "created": 1741263702, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion", "system_fingerprint": "3.1.2-dev0-native", "usage": { - "completion_tokens": 79, - "prompt_tokens": 103, - "total_tokens": 182 + "completion_tokens": 83, + "prompt_tokens": 109, + "total_tokens": 192 } } diff --git a/integration-tests/models/test_completion_prompts.py b/integration-tests/models/test_completion_prompts.py index 6c359f1e..27988ef9 100644 --- a/integration-tests/models/test_completion_prompts.py +++ b/integration-tests/models/test_completion_prompts.py @@ -2,8 +2,9 @@ import pytest import requests import json from aiohttp import ClientSession +from huggingface_hub import InferenceClient -from text_generation.types import Completion, ChatCompletionChunk +from text_generation.types import Completion @pytest.fixture(scope="module") @@ -52,52 +53,35 @@ def test_flash_llama_completion_single_prompt( async def test_flash_llama_completion_stream_usage( flash_llama_completion, response_snapshot ): - url = f"{flash_llama_completion.base_url}/v1/chat/completions" - request = { - "model": "tgi", - "messages": [ + client = InferenceClient(base_url=f"{flash_llama_completion.base_url}/v1") + stream = client.chat_completion( + model="tgi", + messages=[ { "role": "user", "content": "What is Deep Learning?", } ], - "max_tokens": 10, - "temperature": 0.0, - "stream_options": {"include_usage": True}, - "stream": True, - } + max_tokens=10, + temperature=0.0, + stream_options={"include_usage": True}, + stream=True, + ) string = "" chunks = [] had_usage = False - async with ClientSession(headers=flash_llama_completion.headers) as session: - async with session.post(url, json=request) as response: - # iterate over the stream - async for chunk in response.content.iter_any(): - # remove "data:" - chunk = chunk.decode().split("\n\n") - # remove "data:" if present - chunk = [c.replace("data:", "") for c in chunk] - # remove empty strings - chunk = [c for c in chunk if c] - # remove completion marking chunk - chunk = [c for c in chunk if c != " [DONE]"] - # parse json - chunk = [json.loads(c) for c in chunk] + for chunk in stream: + # remove "data:" + chunks.append(chunk) + print(f"Chunk {chunk}") + if len(chunk.choices) == 1: + index = chunk.choices[0].index + assert index == 0 + string += chunk.choices[0].delta.content + if chunk.usage: + assert not had_usage + had_usage = True - for c in chunk: - chunks.append(ChatCompletionChunk(**c)) - assert "choices" in c - if len(c["choices"]) == 1: - index = c["choices"][0]["index"] - assert index == 0 - string += c["choices"][0]["delta"]["content"] - - has_usage = c["usage"] is not None - assert not had_usage - if has_usage: - had_usage = True - else: - raise RuntimeError("Expected different payload") assert had_usage assert ( string @@ -105,51 +89,29 @@ async def test_flash_llama_completion_stream_usage( ) assert chunks == response_snapshot - request = { - "model": "tgi", - "messages": [ + stream = client.chat_completion( + model="tgi", + messages=[ { "role": "user", "content": "What is Deep Learning?", } ], - "max_tokens": 10, - "temperature": 0.0, - "stream": True, - } + max_tokens=10, + temperature=0.0, + # No usage + # stream_options={"include_usage": True}, + stream=True, + ) string = "" chunks = [] had_usage = False - async with ClientSession(headers=flash_llama_completion.headers) as session: - async with session.post(url, json=request) as response: - # iterate over the stream - async for chunk in response.content.iter_any(): - # remove "data:" - chunk = chunk.decode().split("\n\n") - # remove "data:" if present - chunk = [c.replace("data:", "") for c in chunk] - # remove empty strings - chunk = [c for c in chunk if c] - # remove completion marking chunk - chunk = [c for c in chunk if c != " [DONE]"] - # parse json - chunk = [json.loads(c) for c in chunk] - - for c in chunk: - chunks.append(ChatCompletionChunk(**c)) - assert "choices" in c - if len(c["choices"]) == 1: - index = c["choices"][0]["index"] - assert index == 0 - string += c["choices"][0]["delta"]["content"] - - has_usage = c["usage"] is not None - assert not had_usage - if has_usage: - had_usage = True - else: - raise RuntimeError("Expected different payload") - assert not had_usage + for chunk in stream: + chunks.append(chunk) + assert chunk.usage is None + assert len(chunk.choices) == 1 + assert chunk.choices[0].index == 0 + string += chunk.choices[0].delta.content assert ( string == "**Deep Learning: An Overview**\n=====================================\n\n" diff --git a/integration-tests/models/test_tools_llama.py b/integration-tests/models/test_tools_llama.py index 7fd6cadd..ebf69cb7 100644 --- a/integration-tests/models/test_tools_llama.py +++ b/integration-tests/models/test_tools_llama.py @@ -1,7 +1,10 @@ import pytest -import requests -import json from openai import OpenAI +from huggingface_hub import InferenceClient +from huggingface_hub.inference._generated.types.chat_completion import ( + ChatCompletionOutputToolCall, + ChatCompletionOutputFunctionDefinition, +) @pytest.fixture(scope="module") @@ -77,8 +80,11 @@ tools = [ @pytest.mark.asyncio @pytest.mark.private -async def test_flash_llama_grammar_tools(flash_llama_grammar_tools, response_snapshot): - response = await flash_llama_grammar_tools.chat( +async def test_flash_llama_grammar_tools_nostream( + flash_llama_grammar_tools, response_snapshot +): + client = InferenceClient(base_url=f"{flash_llama_grammar_tools.base_url}/v1") + response = client.chat_completion( max_tokens=100, seed=1, tools=tools, @@ -96,15 +102,15 @@ async def test_flash_llama_grammar_tools(flash_llama_grammar_tools, response_sna ) assert response.choices[0].message.content is None assert response.choices[0].message.tool_calls == [ - { - "id": "0", - "type": "function", - "function": { - "description": None, - "name": "get_current_weather", - "arguments": {"format": "celsius", "location": "Brooklyn, New York"}, - }, - } + ChatCompletionOutputToolCall( + id="0", + type="function", + function=ChatCompletionOutputFunctionDefinition( + description=None, + name="get_current_weather", + arguments='{"format":"fahrenheit","location":"Brooklyn, NY"}', + ), + ) ] assert response == response_snapshot @@ -135,18 +141,25 @@ async def test_flash_llama_grammar_tools_openai( ) chunks = [] + tool = "" for chunk in stream: + tool += chunk.choices[0].delta.tool_calls[0].function.arguments chunks.append(chunk) + assert ( + tool + == '{"function": {"_name": "get_current_weather", "location": "Brooklyn, NY", "format": "fahrenheit"}}<|eot_id|>' + ) assert chunks == response_snapshot @pytest.mark.asyncio @pytest.mark.private -async def test_flash_llama_grammar_tools_auto( +async def test_flash_llama_grammar_tools_auto_nostream( flash_llama_grammar_tools, response_snapshot ): - response = await flash_llama_grammar_tools.chat( + client = InferenceClient(base_url=f"{flash_llama_grammar_tools.base_url}/v1") + response = client.chat_completion( max_tokens=100, seed=1, tools=tools, @@ -165,15 +178,15 @@ async def test_flash_llama_grammar_tools_auto( ) assert response.choices[0].message.content is None assert response.choices[0].message.tool_calls == [ - { - "id": "0", - "type": "function", - "function": { - "description": None, - "name": "get_current_weather", - "arguments": {"format": "celsius", "location": "Brooklyn, New York"}, - }, - } + ChatCompletionOutputToolCall( + id="0", + type="function", + function=ChatCompletionOutputFunctionDefinition( + description=None, + name="get_current_weather", + arguments='{"format":"fahrenheit","location":"Brooklyn, NY"}', + ), + ) ] assert response == response_snapshot @@ -181,10 +194,11 @@ async def test_flash_llama_grammar_tools_auto( @pytest.mark.asyncio @pytest.mark.private -async def test_flash_llama_grammar_tools_choice( +async def test_flash_llama_grammar_tools_choice_nostream( flash_llama_grammar_tools, response_snapshot ): - response = await flash_llama_grammar_tools.chat( + client = InferenceClient(base_url=f"{flash_llama_grammar_tools.base_url}/v1") + response = client.chat_completion( max_tokens=100, seed=1, tools=tools, @@ -203,15 +217,15 @@ async def test_flash_llama_grammar_tools_choice( ) assert response.choices[0].message.content is None assert response.choices[0].message.tool_calls == [ - { - "id": "0", - "type": "function", - "function": { - "description": None, - "name": "get_current_weather", - "arguments": {"format": "celsius", "location": "Brooklyn, New York"}, - }, - } + ChatCompletionOutputToolCall( + id="0", + type="function", + function=ChatCompletionOutputFunctionDefinition( + description=None, + name="get_current_weather", + arguments='{"format":"fahrenheit","location":"Brooklyn, NY"}', + ), + ) ] assert response == response_snapshot @@ -219,10 +233,11 @@ async def test_flash_llama_grammar_tools_choice( @pytest.mark.asyncio @pytest.mark.private -async def test_flash_llama_grammar_tools_stream( +async def test_flash_llama_grammar_tools_choice_stream( flash_llama_grammar_tools, response_snapshot ): - responses = await flash_llama_grammar_tools.chat( + client = InferenceClient(base_url=f"{flash_llama_grammar_tools.base_url}/v1") + stream = client.chat_completion( max_tokens=100, seed=1, tools=tools, @@ -241,31 +256,27 @@ async def test_flash_llama_grammar_tools_stream( stream=True, ) - count = 0 tool_calls_generated = "" - last_response = None - async for response in responses: - count += 1 - tool_calls_generated += ( - response.choices[0].delta.tool_calls[0].function.arguments - ) - last_response = response - assert response.choices[0].delta.content is None + chunks = [] + for chunk in stream: + tool_calls_generated += chunk.choices[0].delta.tool_calls[0].function.arguments + assert chunk.choices[0].delta.content is None + chunks.append(chunk) assert ( tool_calls_generated == '{"function": {"_name": "get_current_weather", "location": "Paris, France", "format": "celsius"}}<|eot_id|>' ) - assert count == 28 - assert last_response == response_snapshot + assert chunks == response_snapshot @pytest.mark.asyncio @pytest.mark.private -async def test_flash_llama_grammar_tools_insufficient_information( +async def test_flash_llama_grammar_tools_insufficient_information_nostream( flash_llama_grammar_tools, response_snapshot ): - responses = await flash_llama_grammar_tools.chat( + client = InferenceClient(base_url=f"{flash_llama_grammar_tools.base_url}/v1") + response = client.chat_completion( max_tokens=100, seed=24, tools=tools, @@ -283,10 +294,13 @@ async def test_flash_llama_grammar_tools_insufficient_information( stream=False, ) - assert responses.choices[0].message.tool_calls is None - assert responses.choices[0].message.content == "I am an AI assistant" + content_generated = response.choices[0].message.content + assert response.choices[0].message.tool_calls is None - assert responses == response_snapshot + ######## FIXME before MERGE ############################ + # TODO This is different from the streaming case, this is NOT normal. + assert content_generated == "I am a helpful assistant!" + assert response == response_snapshot @pytest.mark.asyncio @@ -294,7 +308,8 @@ async def test_flash_llama_grammar_tools_insufficient_information( async def test_flash_llama_grammar_tools_insufficient_information_stream( flash_llama_grammar_tools, response_snapshot ): - responses = await flash_llama_grammar_tools.chat( + client = InferenceClient(base_url=f"{flash_llama_grammar_tools.base_url}/v1") + stream = client.chat_completion( max_tokens=100, seed=24, tools=tools, @@ -312,26 +327,24 @@ async def test_flash_llama_grammar_tools_insufficient_information_stream( stream=True, ) - count = 0 content_generated = "" - last_response = None - async for response in responses: - count += 1 - content_generated += response.choices[0].delta.content - last_response = response - assert response.choices[0].delta.tool_calls is None + chunks = [] + for chunk in stream: + content_generated += chunk.choices[0].delta.content + chunks.append(chunk) + assert chunk.choices[0].delta.tool_calls is None - assert count == 5 - assert content_generated == "I am an AI assistant" - assert last_response == response_snapshot + assert content_generated == "I am a helpful assistant" + assert chunks == response_snapshot @pytest.mark.asyncio @pytest.mark.private -async def test_flash_llama_grammar_tools_sea_creatures_stream( +async def test_flash_llama_grammar_tools_sea_creatures_stream_auto( flash_llama_grammar_tools, response_snapshot ): - responses = await flash_llama_grammar_tools.chat( + client = InferenceClient(base_url=f"{flash_llama_grammar_tools.base_url}/v1") + stream = client.chat_completion( max_tokens=100, seed=24, tools=tools, @@ -349,21 +362,18 @@ async def test_flash_llama_grammar_tools_sea_creatures_stream( stream=True, ) - count = 0 content_generated = "" - last_response = None - async for response in responses: - count += 1 - content_generated += response.choices[0].delta.content - last_response = response - assert response.choices[0].delta.tool_calls is None + chunks = [] + for chunk in stream: + content_generated += chunk.choices[0].delta.content + chunks.append(chunk) + assert chunk.choices[0].delta.tool_calls is None - assert count == 62 assert ( content_generated - == "Once upon a time, in the ocean, there lived three sea creatures. There was a wise old octopus named Bob, a mischievous seagull named Sam, and a gentle sea turtle named Luna. They all lived together in a beautiful coral reef, surrounded by colorful fish and swaying sea fans" + == "There was a wise old octopus named Oracle. He lived in a cozy little cave beneath the waves with his best friend, a curious seahorse named Finley. One day, Finley met a playful dolphin named Daisy, and the three became inseparable. They spent their days exploring the ocean, playing hide-and-seek, and learning about the wonders of the sea from Oracle" ) - assert last_response == response_snapshot + assert chunks == response_snapshot @pytest.mark.asyncio @@ -371,7 +381,8 @@ async def test_flash_llama_grammar_tools_sea_creatures_stream( async def test_flash_llama_grammar_tools_sea_creatures_stream_required( flash_llama_grammar_tools, response_snapshot ): - responses = await flash_llama_grammar_tools.chat( + client = InferenceClient(base_url=f"{flash_llama_grammar_tools.base_url}/v1") + stream = client.chat_completion( max_tokens=100, seed=24, tools=tools, @@ -389,23 +400,17 @@ async def test_flash_llama_grammar_tools_sea_creatures_stream_required( stream=True, ) - count = 0 tool_calls_generated = "" - last_response = None - async for response in responses: - count += 1 - assert response.choices[0].delta.content is None - tool_calls_generated += ( - response.choices[0].delta.tool_calls[0].function.arguments - ) - last_response = response + chunks = [] + for chunk in stream: + assert chunk.choices[0].delta.content is None + tool_calls_generated += chunk.choices[0].delta.tool_calls[0].function.arguments - assert count == 29 assert ( tool_calls_generated - == '{"function": {"_name": "get_current_weather", "location": "San Francisco, CA", "format": "celsius"}}<|eot_id|>' + == '{"function": {"_name": "get_n_day_weather_forecast", "location": "San Francisco, CA", "format": "fahrenheit", "num_days":3}}<|eot_id|>' ) - assert last_response == response_snapshot + assert chunks == response_snapshot @pytest.mark.asyncio @@ -413,7 +418,8 @@ async def test_flash_llama_grammar_tools_sea_creatures_stream_required( async def test_flash_llama_grammar_tools_sea_creatures_stream_none( flash_llama_grammar_tools, response_snapshot ): - responses = await flash_llama_grammar_tools.chat( + client = InferenceClient(base_url=f"{flash_llama_grammar_tools.base_url}/v1") + stream = client.chat_completion( max_tokens=100, seed=24, tools=tools, @@ -431,22 +437,18 @@ async def test_flash_llama_grammar_tools_sea_creatures_stream_none( stream=True, ) - count = 0 content_generated = "" - last_response = None - async for response in responses: - count += 1 - content_generated += response.choices[0].delta.content - last_response = response - assert response.choices[0].delta.tool_calls is None + chunks = [] + for chunk in stream: + chunks.append(chunk) + content_generated += chunk.choices[0].delta.content + assert chunk.choices[0].delta.tool_calls is None - assert count == 100 - print(content_generated) assert ( content_generated == "Once upon a time, in a vibrant ocean filled with coral reefs and schools of shimmering fish, lived three dear friends: Luna the sea turtle, Finley the friendly fish, and Crusty the wise crab.\n\nLuna was the oldest of the three. She had traveled the world, exploring hidden caves and shipwrecks, and collecting sparkling shells and shiny pebbles. Her shell was a beautiful mosaic of blues and greens, and her gentle eyes twinkled with the secrets of the deep" ) - assert last_response == response_snapshot + assert chunks == response_snapshot @pytest.mark.asyncio @@ -454,57 +456,37 @@ async def test_flash_llama_grammar_tools_sea_creatures_stream_none( async def test_flash_llama_grammar_tools_sea_creatures_stream_function_object( flash_llama_grammar_tools, response_snapshot ): - # using `requests` to send the request until the client library supports tool_choice as a function object - responses = requests.post( - f"{flash_llama_grammar_tools.base_url}/v1/chat/completions", - headers=flash_llama_grammar_tools.headers, - json={ - "model": "tgi", - "messages": [ - { - "role": "system", - "content": "You're a helpful assistant! Answer the users question best you can. If the question is not answerable by the tools, just generate a response.", - }, - { - "role": "user", - "content": "Tell me a story about 3 sea creatures", - }, - ], - "tools": tools, - "tool_choice": { - "type": "function", - "function": {"name": "get_n_day_weather_forecast"}, + client = InferenceClient(base_url=f"{flash_llama_grammar_tools.base_url}/v1") + stream = client.chat_completion( + messages=[ + { + "role": "system", + "content": "You're a helpful assistant! Answer the users question best you can. If the question is not answerable by the tools, just generate a response.", }, - "seed": 24, - "max_tokens": 100, - "stream": True, + { + "role": "user", + "content": "Tell me a story about 3 sea creatures", + }, + ], + tools=tools, + tool_choice={ + "type": "function", + "function": {"name": "get_n_day_weather_forecast"}, }, + max_tokens=100, + seed=24, stream=True, ) - # iterate over the response in chunks - count = 0 + chunks = [] tool_calls_generated = "" - last_response = None - for chunk in responses.iter_content(chunk_size=1024): - if chunk: - count += 1 - # remove the "data: " prefix, trailing newline, and split the chunk into individual lines - lines = chunk.decode("utf-8").replace("data: ", "").rstrip("\n").split("\n") - for line in lines: - if line == "[DONE]": - break - response = json.loads(line) - tool_calls_generated += response["choices"][0]["delta"]["tool_calls"][ - 0 - ]["function"]["arguments"] - last_response = response - - assert count == 39 + for chunk in stream: + tool_calls_generated += chunk.choices[0].delta.tool_calls[0].function.arguments + chunks.append(chunk) assert ( tool_calls_generated - == '{"function": {"_name": "get_n_day_weather_forecast", "location": "San Francisco, CA", "format": "celsius", "num_days":3}}<|eot_id|>' + == '{"function": {"_name": "get_n_day_weather_forecast", "location": "San Francisco, CA", "format": "celsius", "num_days": 3}}<|eot_id|>' ) - assert last_response == response_snapshot + assert chunks == response_snapshot @pytest.mark.asyncio @@ -512,7 +494,8 @@ async def test_flash_llama_grammar_tools_sea_creatures_stream_function_object( async def test_flash_llama_tool_reply_response( flash_llama_grammar_tools, response_snapshot ): - responses = await flash_llama_grammar_tools.chat( + client = InferenceClient(base_url=f"{flash_llama_grammar_tools.base_url}/v1") + response = client.chat_completion( max_tokens=100, seed=42, messages=[ @@ -536,10 +519,10 @@ async def test_flash_llama_tool_reply_response( stream=False, ) - assert responses.choices[0].message.tool_calls is None + assert response.choices[0].message.tool_calls is None assert ( - responses.choices[0].message.content - == "I can't access real-time data, but I can provide you with current conditions and forecast for Paris, France:\n\nThe current conditions in Paris are mostly cloudy with a temperature of 6.7°C (44.1°F). \n\nPlease note that the actual weather may differ from this information, and I recommend checking the forecast on a reliable weather website for the most up-to-date information." + response.choices[0].message.content + == "I can't access real-time data, but I can provide you with current conditions and forecast for Paris, France:\n\nThe current conditions in Paris are mostly cloudy with a temperature of 6.7°C (44.1°F). \n\nPlease note that the actual weather may differ from the provided information. For up-to-date information, I suggest checking a reliable weather website or app for the latest conditions and forecast." ) - assert responses == response_snapshot + assert response == response_snapshot diff --git a/integration-tests/pyproject.toml b/integration-tests/pyproject.toml index 37003440..07aa4307 100644 --- a/integration-tests/pyproject.toml +++ b/integration-tests/pyproject.toml @@ -14,6 +14,7 @@ dependencies = [ "docker>=7", "numpy>=2.0", "openai>=1.65", + "huggingface_hub>=0.29", ] [tool.isort] diff --git a/integration-tests/requirements.txt b/integration-tests/requirements.txt index d419d4b3..a85db4a5 100644 --- a/integration-tests/requirements.txt +++ b/integration-tests/requirements.txt @@ -40,7 +40,9 @@ httpcore==1.0.7 httpx==0.28.1 # via openai huggingface-hub==0.29.0 - # via text-generation + # via + # text-generation-integration-tests (pyproject.toml) + # text-generation idna==3.10 # via # anyio diff --git a/router/src/infer/chat_template.rs b/router/src/infer/chat_template.rs index e660cc74..b179dd4d 100644 --- a/router/src/infer/chat_template.rs +++ b/router/src/infer/chat_template.rs @@ -1189,7 +1189,7 @@ TOOL CALL ID: 0 let tool_prompt = "This default prompt will be used".to_string(); let tools_and_prompt = Some((tools, tool_prompt)); let result = ct.apply(msgs, tools_and_prompt); - let expected = "[INST] I'd like to show off how chat templating works! [/INST]Great! How can I help you today? [INST] Just testing\n---\n[{\"type\":\"function\",\"function\":{\"description\":\"Get the current weather\",\"name\":\"get_current_weather\",\"arguments\":{\"type\":\"object\",\"properties\":{\"location\":{\"type\":\"string\",\"description\":\"The city and state, e.g. San Francisco, CA\"},\"format\":{\"type\":\"string\",\"enum\":[\"celsius\",\"fahrenheit\"],\"description\":\"The temperature unit to use. Infer this from the users location.\"}},\"required\":[\"location\",\"format\"]}}}]\nThis default prompt will be used [/INST]".to_string(); + let expected = "[INST] I'd like to show off how chat templating works! [/INST]Great! How can I help you today? [INST] Just testing\n---\n[{\"type\":\"function\",\"function\":{\"description\":\"Get the current weather\",\"name\":\"get_current_weather\",\"arguments\":\"{\\\"type\\\":\\\"object\\\",\\\"properties\\\":{\\\"location\\\":{\\\"type\\\":\\\"string\\\",\\\"description\\\":\\\"The city and state, e.g. San Francisco, CA\\\"},\\\"format\\\":{\\\"type\\\":\\\"string\\\",\\\"enum\\\":[\\\"celsius\\\",\\\"fahrenheit\\\"],\\\"description\\\":\\\"The temperature unit to use. Infer this from the users location.\\\"}},\\\"required\\\":[\\\"location\\\",\\\"format\\\"]}\"}}]\nThis default prompt will be used [/INST]".to_string(); assert_eq!(result.unwrap(), expected); } @@ -1227,7 +1227,7 @@ TOOL CALL ID: 0 let tool_prompt = "This default prompt will be used".to_string(); let tools_and_prompt = Some((tools, tool_prompt)); let result = ct.apply(msgs, tools_and_prompt); - let expected = "<|start_header_id|>system<|end_header_id|>\n\nEnvironment: ipython\nCutting Knowledge Date: December 2023\nToday Date: 26 Jul 2024\n\nYoure a helpful assistant! Answer the users question best you can.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nGiven the following functions, please respond with a JSON for a function call with its proper arguments that best answers the given prompt.\n\nRespond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.Do not use variables.\n\n{\n \"function\": {\n \"arguments\": {\n \"properties\": {\n \"format\": {\n \"description\": \"The temperature unit to use. Infer this from the users location.\",\n \"enum\": [\n \"celsius\",\n \"fahrenheit\"\n ],\n \"type\": \"string\"\n },\n \"location\": {\n \"description\": \"The city and state, e.g. San Francisco, CA\",\n \"type\": \"string\"\n }\n },\n \"required\": [\n \"location\",\n \"format\"\n ],\n \"type\": \"object\"\n },\n \"description\": \"Get the current weather\",\n \"name\": \"get_current_weather\"\n },\n \"type\": \"function\"\n}\n\nWhat is the weather like in Brooklyn, New York?\n---\nThis default prompt will be used<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n".to_string(); + let expected = "<|start_header_id|>system<|end_header_id|>\n\nEnvironment: ipython\nCutting Knowledge Date: December 2023\nToday Date: 26 Jul 2024\n\nYoure a helpful assistant! Answer the users question best you can.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nGiven the following functions, please respond with a JSON for a function call with its proper arguments that best answers the given prompt.\n\nRespond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.Do not use variables.\n\n{\n \"function\": {\n \"arguments\": \"{\\\"type\\\":\\\"object\\\",\\\"properties\\\":{\\\"location\\\":{\\\"type\\\":\\\"string\\\",\\\"description\\\":\\\"The city and state, e.g. San Francisco, CA\\\"},\\\"format\\\":{\\\"type\\\":\\\"string\\\",\\\"enum\\\":[\\\"celsius\\\",\\\"fahrenheit\\\"],\\\"description\\\":\\\"The temperature unit to use. Infer this from the users location.\\\"}},\\\"required\\\":[\\\"location\\\",\\\"format\\\"]}\",\n \"description\": \"Get the current weather\",\n \"name\": \"get_current_weather\"\n },\n \"type\": \"function\"\n}\n\nWhat is the weather like in Brooklyn, New York?\n---\nThis default prompt will be used<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n".to_string(); assert_eq!(result.unwrap(), expected); } } diff --git a/router/src/lib.rs b/router/src/lib.rs index 60f1f73a..a7923c4c 100644 --- a/router/src/lib.rs +++ b/router/src/lib.rs @@ -1138,10 +1138,17 @@ pub struct FunctionDefinition { #[serde(default)] pub description: Option, pub name: String, - #[serde(alias = "parameters")] + #[serde(alias = "parameters", serialize_with = "serialize_as_string")] pub arguments: serde_json::Value, } +fn serialize_as_string(value: &serde_json::Value, serializer: S) -> Result +where + S: serde::Serializer, +{ + serializer.serialize_str(&value.to_string()) +} + #[derive(Clone, Debug, Deserialize, Serialize, ToSchema)] #[cfg_attr(test, derive(PartialEq))] pub(crate) struct Tool { @@ -1730,7 +1737,7 @@ mod tests { let serialized = serde_json::to_string(&message).unwrap(); assert_eq!( serialized, - r#"{"role":"assistant","tool_calls":[{"id":"0","type":"function","function":{"description":null,"name":"myfn","arguments":{"format":"csv"}}}]}"# + r#"{"role":"assistant","tool_calls":[{"id":"0","type":"function","function":{"description":null,"name":"myfn","arguments":"{\"format\":\"csv\"}"}}]}"# ); }