diff --git a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_insufficient_information_nostream.json b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_insufficient_information_nostream.json index 797c9578..6d841747 100644 --- a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_insufficient_information_nostream.json +++ b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_insufficient_information_nostream.json @@ -5,20 +5,20 @@ "index": 0, "logprobs": null, "message": { - "content": "I am a helpful assistant!", + "content": "I'm an artificial intelligence model known as a large language model (LLM) or conversational AI", "role": "assistant", "tool_calls": null } } ], - "created": 1741263686, + "created": 1741693957, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion", "system_fingerprint": "3.1.2-dev0-native", "usage": { - "completion_tokens": 23, - "prompt_tokens": 494, - "total_tokens": 517 + "completion_tokens": 12, + "prompt_tokens": 53, + "total_tokens": 65 } } diff --git a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_insufficient_information_stream.json b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_insufficient_information_stream.json index dc969cee..47f23f4c 100644 --- a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_insufficient_information_stream.json +++ b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_insufficient_information_stream.json @@ -1,24 +1,4 @@ [ - { - "choices": [ - { - "delta": { - "content": "", - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741364571, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, { "choices": [ { @@ -32,7 +12,7 @@ "logprobs": null } ], - "created": 1741364571, + "created": 1741694017, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -43,7 +23,7 @@ "choices": [ { "delta": { - "content": " am", + "content": "'m", "role": "assistant", "tool_calls": null }, @@ -52,7 +32,127 @@ "logprobs": null } ], - "created": 1741364571, + "created": 1741694017, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " an", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741694017, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " artificial", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741694017, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " intelligence", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741694017, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " model", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741694017, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " known", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741694017, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " as", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741694017, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -72,7 +172,7 @@ "logprobs": null } ], - "created": 1741364571, + "created": 1741694017, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -83,7 +183,7 @@ "choices": [ { "delta": { - "content": " helpful", + "content": " large", "role": "assistant", "tool_calls": null }, @@ -92,7 +192,7 @@ "logprobs": null } ], - "created": 1741364571, + "created": 1741694017, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -103,7 +203,7 @@ "choices": [ { "delta": { - "content": " assistant", + "content": " language", "role": "assistant", "tool_calls": null }, @@ -112,7 +212,7 @@ "logprobs": null } ], - "created": 1741364571, + "created": 1741694017, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -123,7 +223,7 @@ "choices": [ { "delta": { - "content": "!", + "content": " model", "role": "assistant", "tool_calls": null }, @@ -132,7 +232,167 @@ "logprobs": null } ], - "created": 1741364571, + "created": 1741694017, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " (", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741694017, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "LL", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741694017, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "M", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741694017, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": ")", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741694017, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " or", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741694017, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " convers", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741694017, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "ational", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741694017, + "id": "", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk", + "system_fingerprint": "3.1.2-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " AI", + "role": "assistant", + "tool_calls": null + }, + "finish_reason": "length", + "index": 0, + "logprobs": null + } + ], + "created": 1741694017, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", diff --git a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_sea_creatures_stream_auto.json b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_sea_creatures_stream_auto.json index cdac9bc4..30f03920 100644 --- a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_sea_creatures_stream_auto.json +++ b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_sea_creatures_stream_auto.json @@ -3,7 +3,7 @@ "choices": [ { "delta": { - "content": "", + "content": "Once", "role": "assistant", "tool_calls": null }, @@ -12,7 +12,7 @@ "logprobs": null } ], - "created": 1741371722, + "created": 1741695408, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -23,7 +23,7 @@ "choices": [ { "delta": { - "content": "There", + "content": " upon", "role": "assistant", "tool_calls": null }, @@ -32,27 +32,7 @@ "logprobs": null } ], - "created": 1741371722, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": " was", - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741371722, + "created": 1741695408, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -72,7 +52,7 @@ "logprobs": null } ], - "created": 1741371722, + "created": 1741695408, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -83,7 +63,7 @@ "choices": [ { "delta": { - "content": " wise", + "content": " time", "role": "assistant", "tool_calls": null }, @@ -92,7 +72,7 @@ "logprobs": null } ], - "created": 1741371722, + "created": 1741695408, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -103,7 +83,7 @@ "choices": [ { "delta": { - "content": " old", + "content": ",", "role": "assistant", "tool_calls": null }, @@ -112,147 +92,7 @@ "logprobs": null } ], - "created": 1741371722, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": " oct", - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741371722, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": "opus", - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741371722, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": " named", - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741371722, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": " Oracle", - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741371722, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": ".", - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741371722, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": " He", - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741371722, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": " lived", - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741371722, + "created": 1741695408, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -272,7 +112,7 @@ "logprobs": null } ], - "created": 1741371722, + "created": 1741695408, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -292,7 +132,7 @@ "logprobs": null } ], - "created": 1741371722, + "created": 1741695408, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -303,7 +143,7 @@ "choices": [ { "delta": { - "content": " cozy", + "content": " vibrant", "role": "assistant", "tool_calls": null }, @@ -312,887 +152,7 @@ "logprobs": null } ], - "created": 1741371722, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": " little", - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741371722, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": " cave", - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741371722, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": " beneath", - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741371723, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": " the", - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741371723, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": " waves", - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741371723, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": " with", - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741371723, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": " his", - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741371723, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": " best", - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741371723, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": " friend", - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741371723, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": ",", - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741371723, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": " a", - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741371723, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": " curious", - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741371723, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": " se", - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741371723, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": "ah", - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741371723, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": "orse", - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741371723, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": " named", - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741371723, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": " Fin", - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741371723, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": "ley", - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741371723, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": ".", - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741371723, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": " One", - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741371723, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": " day", - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741371723, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": ",", - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741371723, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": " Fin", - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741371723, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": "ley", - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741371723, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": " met", - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741371723, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": " a", - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741371723, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": " playful", - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741371723, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": " dolphin", - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741371723, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": " named", - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741371723, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": " Daisy", - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741371723, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": ",", - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741371723, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": " and", - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741371723, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": " the", - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741371724, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": " three", - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741371724, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": " became", - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741371724, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": " inse", - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741371724, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": "parable", - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741371724, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": ".", - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741371724, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": " They", - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741371724, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": " spent", - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741371724, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": " their", - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741371724, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": " days", - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741371724, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": " exploring", - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741371724, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": " the", - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741371724, + "created": 1741695408, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -1212,7 +172,7 @@ "logprobs": null } ], - "created": 1741371724, + "created": 1741695408, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -1223,7 +183,7 @@ "choices": [ { "delta": { - "content": ",", + "content": " filled", "role": "assistant", "tool_calls": null }, @@ -1232,7 +192,7 @@ "logprobs": null } ], - "created": 1741371724, + "created": 1741695408, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -1243,7 +203,7 @@ "choices": [ { "delta": { - "content": " playing", + "content": " with", "role": "assistant", "tool_calls": null }, @@ -1252,7 +212,7 @@ "logprobs": null } ], - "created": 1741371724, + "created": 1741695408, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -1263,7 +223,7 @@ "choices": [ { "delta": { - "content": " hide", + "content": " coral", "role": "assistant", "tool_calls": null }, @@ -1272,7 +232,7 @@ "logprobs": null } ], - "created": 1741371724, + "created": 1741695408, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -1283,7 +243,7 @@ "choices": [ { "delta": { - "content": "-and", + "content": " reefs", "role": "assistant", "tool_calls": null }, @@ -1292,67 +252,7 @@ "logprobs": null } ], - "created": 1741371724, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": "-se", - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741371724, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": "ek", - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741371724, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": ",", - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741371724, + "created": 1741695408, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -1372,7 +272,7 @@ "logprobs": null } ], - "created": 1741371724, + "created": 1741695408, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -1383,7 +283,7 @@ "choices": [ { "delta": { - "content": " learning", + "content": " schools", "role": "assistant", "tool_calls": null }, @@ -1392,67 +292,7 @@ "logprobs": null } ], - "created": 1741371724, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": " about", - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741371724, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": " the", - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741371724, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": " wonders", - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741371724, + "created": 1741695408, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -1472,7 +312,7 @@ "logprobs": null } ], - "created": 1741371724, + "created": 1741695408, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -1483,7 +323,7 @@ "choices": [ { "delta": { - "content": " the", + "content": " shimmer", "role": "assistant", "tool_calls": null }, @@ -1492,7 +332,7 @@ "logprobs": null } ], - "created": 1741371724, + "created": 1741695408, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -1503,7 +343,7 @@ "choices": [ { "delta": { - "content": " sea", + "content": "ing", "role": "assistant", "tool_calls": null }, @@ -1512,7 +352,7 @@ "logprobs": null } ], - "created": 1741371724, + "created": 1741695408, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -1523,7 +363,7 @@ "choices": [ { "delta": { - "content": " from", + "content": " fish", "role": "assistant", "tool_calls": null }, @@ -1532,7 +372,7 @@ "logprobs": null } ], - "created": 1741371724, + "created": 1741695408, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", @@ -1543,36 +383,16 @@ "choices": [ { "delta": { - "content": " Oracle", + "content": ",", "role": "assistant", "tool_calls": null }, - "finish_reason": null, + "finish_reason": "length", "index": 0, "logprobs": null } ], - "created": 1741371724, - "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion.chunk", - "system_fingerprint": "3.1.2-dev0-native", - "usage": null - }, - { - "choices": [ - { - "delta": { - "content": ".", - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 1741371725, + "created": 1741695408, "id": "", "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk", diff --git a/integration-tests/models/test_tools_llama.py b/integration-tests/models/test_tools_llama.py index bb4b308b..612fa6bd 100644 --- a/integration-tests/models/test_tools_llama.py +++ b/integration-tests/models/test_tools_llama.py @@ -279,7 +279,7 @@ async def test_flash_llama_grammar_tools_insufficient_information_nostream( ): client = InferenceClient(base_url=f"{flash_llama_grammar_tools.base_url}/v1") response = client.chat_completion( - max_tokens=100, + max_tokens=20, seed=24, tools=tools, tool_choice="auto", @@ -299,7 +299,10 @@ async def test_flash_llama_grammar_tools_insufficient_information_nostream( content_generated = response.choices[0].message.content assert response.choices[0].message.tool_calls is None - assert content_generated == "I am a helpful assistant!" + assert ( + content_generated + == "I'm an artificial intelligence model known as a large language model (LLM) or conversational AI" + ) assert response == response_snapshot @@ -310,7 +313,7 @@ async def test_flash_llama_grammar_tools_insufficient_information_stream( ): client = InferenceClient(base_url=f"{flash_llama_grammar_tools.base_url}/v1") stream = client.chat_completion( - max_tokens=100, + max_tokens=20, seed=24, tools=tools, tool_choice="auto", @@ -335,7 +338,10 @@ async def test_flash_llama_grammar_tools_insufficient_information_stream( assert chunk.choices[0].delta.tool_calls is None ######## This is exactly the same as the non streaming case - assert content_generated == "I am a helpful assistant!" + assert ( + content_generated + == "I'm an artificial intelligence model known as a large language model (LLM) or conversational AI" + ) assert chunks == response_snapshot @@ -346,7 +352,7 @@ async def test_flash_llama_grammar_tools_sea_creatures_stream_auto( ): client = InferenceClient(base_url=f"{flash_llama_grammar_tools.base_url}/v1") stream = client.chat_completion( - max_tokens=100, + max_tokens=20, seed=24, tools=tools, tool_choice="auto", @@ -372,7 +378,7 @@ async def test_flash_llama_grammar_tools_sea_creatures_stream_auto( assert ( content_generated - == "There was a wise old octopus named Oracle. He lived in a cozy little cave beneath the waves with his best friend, a curious seahorse named Finley. One day, Finley met a playful dolphin named Daisy, and the three became inseparable. They spent their days exploring the ocean, playing hide-and-seek, and learning about the wonders of the sea from Oracle." + == "Once upon a time, in a vibrant ocean filled with coral reefs and schools of shimmering fish," ) assert chunks == response_snapshot