mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-19 22:02:06 +00:00
Making tool_calls
a vector. (#3075)
* Making `tool_calls` a vector. * Update doc. * Fixing the nix overlay with updated version. * Add openai dependency. * Updating the old tests. * Trying to reduce the logs in the case of errors. * Less spammy logs too.
This commit is contained in:
parent
3208d1cd1d
commit
8e92942a18
@ -311,7 +311,7 @@ impl State {
|
|||||||
+ entry.request.stopping_parameters.max_new_tokens
|
+ entry.request.stopping_parameters.max_new_tokens
|
||||||
+ self.speculate
|
+ self.speculate
|
||||||
- 1;
|
- 1;
|
||||||
tracing::debug!("Allocating {tokens} with {input_ids:?}");
|
// tracing::debug!("Allocating {tokens} with {input_ids:?}");
|
||||||
|
|
||||||
let block_allocation = match block_allocator.allocate(tokens, input_ids).await {
|
let block_allocation = match block_allocator.allocate(tokens, input_ids).await {
|
||||||
None => {
|
None => {
|
||||||
@ -322,7 +322,7 @@ impl State {
|
|||||||
break 'entry_loop;
|
break 'entry_loop;
|
||||||
}
|
}
|
||||||
Some(mut block_allocation) => {
|
Some(mut block_allocation) => {
|
||||||
tracing::debug!("Allocation: {block_allocation:?}");
|
// tracing::debug!("Allocation: {block_allocation:?}");
|
||||||
max_blocks = max(max_blocks, block_allocation.blocks.len() as u32);
|
max_blocks = max(max_blocks, block_allocation.blocks.len() as u32);
|
||||||
|
|
||||||
if block_allocation.prefix_len == entry.request.input_length {
|
if block_allocation.prefix_len == entry.request.input_length {
|
||||||
|
@ -67,7 +67,7 @@ class ChoiceDeltaToolCall(BaseModel):
|
|||||||
class ChoiceDelta(BaseModel):
|
class ChoiceDelta(BaseModel):
|
||||||
role: str
|
role: str
|
||||||
content: Optional[str] = None
|
content: Optional[str] = None
|
||||||
tool_calls: Optional[ChoiceDeltaToolCall] = None
|
tool_calls: Optional[List[ChoiceDeltaToolCall]] = None
|
||||||
|
|
||||||
|
|
||||||
class Choice(BaseModel):
|
class Choice(BaseModel):
|
||||||
|
@ -2302,7 +2302,10 @@
|
|||||||
"example": "assistant"
|
"example": "assistant"
|
||||||
},
|
},
|
||||||
"tool_calls": {
|
"tool_calls": {
|
||||||
"$ref": "#/components/schemas/DeltaToolCall"
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"$ref": "#/components/schemas/DeltaToolCall"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
@ -98,7 +98,7 @@ def pytest_collection_modifyitems(config, items):
|
|||||||
selector(item)
|
selector(item)
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(autouse=True)
|
@pytest.fixture(autouse=True, scope="module")
|
||||||
def container_log(request: SubRequest):
|
def container_log(request: SubRequest):
|
||||||
error_log = request.getfixturevalue("error_log")
|
error_log = request.getfixturevalue("error_log")
|
||||||
assert error_log is not None
|
assert error_log is not None
|
||||||
@ -269,7 +269,17 @@ class ResponseComparator(JSONSnapshotExtension):
|
|||||||
def eq_chat_complete_chunk(
|
def eq_chat_complete_chunk(
|
||||||
response: ChatCompletionChunk, other: ChatCompletionChunk
|
response: ChatCompletionChunk, other: ChatCompletionChunk
|
||||||
) -> bool:
|
) -> bool:
|
||||||
return response.choices[0].delta.content == other.choices[0].delta.content
|
if response.choices[0].delta.content is not None:
|
||||||
|
return (
|
||||||
|
response.choices[0].delta.content == other.choices[0].delta.content
|
||||||
|
)
|
||||||
|
elif response.choices[0].delta.tool_calls is not None:
|
||||||
|
return (
|
||||||
|
response.choices[0].delta.tool_calls
|
||||||
|
== other.choices[0].delta.tool_calls
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
raise RuntimeError(f"Invalid empty chat chunk {response} vs {other}")
|
||||||
|
|
||||||
def eq_response(response: Response, other: Response) -> bool:
|
def eq_response(response: Response, other: Response) -> bool:
|
||||||
return response.generated_text == other.generated_text and eq_details(
|
return response.generated_text == other.generated_text and eq_details(
|
||||||
|
@ -26,11 +26,11 @@
|
|||||||
"usage": null
|
"usage": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1732293383,
|
"created": 1741195536,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion",
|
"object": "chat.completion",
|
||||||
"system_fingerprint": "2.4.1-dev0-native",
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
"usage": {
|
"usage": {
|
||||||
"completion_tokens": 30,
|
"completion_tokens": 30,
|
||||||
"prompt_tokens": 615,
|
"prompt_tokens": 615,
|
||||||
|
@ -26,11 +26,11 @@
|
|||||||
"usage": null
|
"usage": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1732293384,
|
"created": 1741195538,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion",
|
"object": "chat.completion",
|
||||||
"system_fingerprint": "2.4.1-dev0-native",
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
"usage": {
|
"usage": {
|
||||||
"completion_tokens": 30,
|
"completion_tokens": 30,
|
||||||
"prompt_tokens": 615,
|
"prompt_tokens": 615,
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
"finish_reason": "eos_token",
|
"finish_reason": "stop",
|
||||||
"index": 0,
|
"index": 0,
|
||||||
"logprobs": null,
|
"logprobs": null,
|
||||||
"message": {
|
"message": {
|
||||||
@ -13,12 +13,12 @@
|
|||||||
"function": {
|
"function": {
|
||||||
"arguments": {
|
"arguments": {
|
||||||
"format": "celsius",
|
"format": "celsius",
|
||||||
"location": "New York, NY"
|
"location": "Brooklyn, New York"
|
||||||
},
|
},
|
||||||
"description": null,
|
"description": null,
|
||||||
"name": "get_current_weather"
|
"name": "get_current_weather"
|
||||||
},
|
},
|
||||||
"id": 0,
|
"id": "0",
|
||||||
"type": "function"
|
"type": "function"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
@ -26,14 +26,14 @@
|
|||||||
"usage": null
|
"usage": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1712852394,
|
"created": 1741195540,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "text_completion",
|
"object": "chat.completion",
|
||||||
"system_fingerprint": "2.0.1-native",
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
"usage": {
|
"usage": {
|
||||||
"completion_tokens": 48,
|
"completion_tokens": 30,
|
||||||
"prompt_tokens": 320,
|
"prompt_tokens": 326,
|
||||||
"total_tokens": 368
|
"total_tokens": 356
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -13,14 +13,14 @@
|
|||||||
"usage": null
|
"usage": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1728497062,
|
"created": 1741195542,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion",
|
"object": "chat.completion",
|
||||||
"system_fingerprint": "2.4.2-dev0-native",
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
"usage": {
|
"usage": {
|
||||||
"completion_tokens": 23,
|
"completion_tokens": 22,
|
||||||
"prompt_tokens": 604,
|
"prompt_tokens": 608,
|
||||||
"total_tokens": 627
|
"total_tokens": 630
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -11,10 +11,10 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1728497531,
|
"created": 1741195542,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
"system_fingerprint": "2.4.2-dev0-native",
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
"usage": null
|
"usage": null
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,992 @@
|
|||||||
|
[
|
||||||
|
{
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": null,
|
||||||
|
"function_call": null,
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": [
|
||||||
|
{
|
||||||
|
"function": {
|
||||||
|
"arguments": "{\"",
|
||||||
|
"name": null
|
||||||
|
},
|
||||||
|
"id": "",
|
||||||
|
"index": 0,
|
||||||
|
"type": "function"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1741195536,
|
||||||
|
"id": "",
|
||||||
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": null,
|
||||||
|
"function_call": null,
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": [
|
||||||
|
{
|
||||||
|
"function": {
|
||||||
|
"arguments": "function",
|
||||||
|
"name": null
|
||||||
|
},
|
||||||
|
"id": "",
|
||||||
|
"index": 0,
|
||||||
|
"type": "function"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1741195536,
|
||||||
|
"id": "",
|
||||||
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": null,
|
||||||
|
"function_call": null,
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": [
|
||||||
|
{
|
||||||
|
"function": {
|
||||||
|
"arguments": "\":",
|
||||||
|
"name": null
|
||||||
|
},
|
||||||
|
"id": "",
|
||||||
|
"index": 0,
|
||||||
|
"type": "function"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1741195536,
|
||||||
|
"id": "",
|
||||||
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": null,
|
||||||
|
"function_call": null,
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": [
|
||||||
|
{
|
||||||
|
"function": {
|
||||||
|
"arguments": " {\"",
|
||||||
|
"name": null
|
||||||
|
},
|
||||||
|
"id": "",
|
||||||
|
"index": 0,
|
||||||
|
"type": "function"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1741195536,
|
||||||
|
"id": "",
|
||||||
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": null,
|
||||||
|
"function_call": null,
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": [
|
||||||
|
{
|
||||||
|
"function": {
|
||||||
|
"arguments": "_",
|
||||||
|
"name": null
|
||||||
|
},
|
||||||
|
"id": "",
|
||||||
|
"index": 0,
|
||||||
|
"type": "function"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1741195536,
|
||||||
|
"id": "",
|
||||||
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": null,
|
||||||
|
"function_call": null,
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": [
|
||||||
|
{
|
||||||
|
"function": {
|
||||||
|
"arguments": "name",
|
||||||
|
"name": null
|
||||||
|
},
|
||||||
|
"id": "",
|
||||||
|
"index": 0,
|
||||||
|
"type": "function"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1741195536,
|
||||||
|
"id": "",
|
||||||
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": null,
|
||||||
|
"function_call": null,
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": [
|
||||||
|
{
|
||||||
|
"function": {
|
||||||
|
"arguments": "\":",
|
||||||
|
"name": null
|
||||||
|
},
|
||||||
|
"id": "",
|
||||||
|
"index": 0,
|
||||||
|
"type": "function"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1741195536,
|
||||||
|
"id": "",
|
||||||
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": null,
|
||||||
|
"function_call": null,
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": [
|
||||||
|
{
|
||||||
|
"function": {
|
||||||
|
"arguments": " \"",
|
||||||
|
"name": null
|
||||||
|
},
|
||||||
|
"id": "",
|
||||||
|
"index": 0,
|
||||||
|
"type": "function"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1741195536,
|
||||||
|
"id": "",
|
||||||
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": null,
|
||||||
|
"function_call": null,
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": [
|
||||||
|
{
|
||||||
|
"function": {
|
||||||
|
"arguments": "get",
|
||||||
|
"name": null
|
||||||
|
},
|
||||||
|
"id": "",
|
||||||
|
"index": 0,
|
||||||
|
"type": "function"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1741195536,
|
||||||
|
"id": "",
|
||||||
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": null,
|
||||||
|
"function_call": null,
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": [
|
||||||
|
{
|
||||||
|
"function": {
|
||||||
|
"arguments": "_current",
|
||||||
|
"name": null
|
||||||
|
},
|
||||||
|
"id": "",
|
||||||
|
"index": 0,
|
||||||
|
"type": "function"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1741195536,
|
||||||
|
"id": "",
|
||||||
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": null,
|
||||||
|
"function_call": null,
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": [
|
||||||
|
{
|
||||||
|
"function": {
|
||||||
|
"arguments": "_weather",
|
||||||
|
"name": null
|
||||||
|
},
|
||||||
|
"id": "",
|
||||||
|
"index": 0,
|
||||||
|
"type": "function"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1741195536,
|
||||||
|
"id": "",
|
||||||
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": null,
|
||||||
|
"function_call": null,
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": [
|
||||||
|
{
|
||||||
|
"function": {
|
||||||
|
"arguments": "\",",
|
||||||
|
"name": null
|
||||||
|
},
|
||||||
|
"id": "",
|
||||||
|
"index": 0,
|
||||||
|
"type": "function"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1741195536,
|
||||||
|
"id": "",
|
||||||
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": null,
|
||||||
|
"function_call": null,
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": [
|
||||||
|
{
|
||||||
|
"function": {
|
||||||
|
"arguments": " \"",
|
||||||
|
"name": null
|
||||||
|
},
|
||||||
|
"id": "",
|
||||||
|
"index": 0,
|
||||||
|
"type": "function"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1741195536,
|
||||||
|
"id": "",
|
||||||
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": null,
|
||||||
|
"function_call": null,
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": [
|
||||||
|
{
|
||||||
|
"function": {
|
||||||
|
"arguments": "location",
|
||||||
|
"name": null
|
||||||
|
},
|
||||||
|
"id": "",
|
||||||
|
"index": 0,
|
||||||
|
"type": "function"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1741195536,
|
||||||
|
"id": "",
|
||||||
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": null,
|
||||||
|
"function_call": null,
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": [
|
||||||
|
{
|
||||||
|
"function": {
|
||||||
|
"arguments": "\":",
|
||||||
|
"name": null
|
||||||
|
},
|
||||||
|
"id": "",
|
||||||
|
"index": 0,
|
||||||
|
"type": "function"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1741195536,
|
||||||
|
"id": "",
|
||||||
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": null,
|
||||||
|
"function_call": null,
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": [
|
||||||
|
{
|
||||||
|
"function": {
|
||||||
|
"arguments": " \"",
|
||||||
|
"name": null
|
||||||
|
},
|
||||||
|
"id": "",
|
||||||
|
"index": 0,
|
||||||
|
"type": "function"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1741195536,
|
||||||
|
"id": "",
|
||||||
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": null,
|
||||||
|
"function_call": null,
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": [
|
||||||
|
{
|
||||||
|
"function": {
|
||||||
|
"arguments": "Bro",
|
||||||
|
"name": null
|
||||||
|
},
|
||||||
|
"id": "",
|
||||||
|
"index": 0,
|
||||||
|
"type": "function"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1741195536,
|
||||||
|
"id": "",
|
||||||
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": null,
|
||||||
|
"function_call": null,
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": [
|
||||||
|
{
|
||||||
|
"function": {
|
||||||
|
"arguments": "oklyn",
|
||||||
|
"name": null
|
||||||
|
},
|
||||||
|
"id": "",
|
||||||
|
"index": 0,
|
||||||
|
"type": "function"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1741195536,
|
||||||
|
"id": "",
|
||||||
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": null,
|
||||||
|
"function_call": null,
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": [
|
||||||
|
{
|
||||||
|
"function": {
|
||||||
|
"arguments": ",",
|
||||||
|
"name": null
|
||||||
|
},
|
||||||
|
"id": "",
|
||||||
|
"index": 0,
|
||||||
|
"type": "function"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1741195536,
|
||||||
|
"id": "",
|
||||||
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": null,
|
||||||
|
"function_call": null,
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": [
|
||||||
|
{
|
||||||
|
"function": {
|
||||||
|
"arguments": " New",
|
||||||
|
"name": null
|
||||||
|
},
|
||||||
|
"id": "",
|
||||||
|
"index": 0,
|
||||||
|
"type": "function"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1741195536,
|
||||||
|
"id": "",
|
||||||
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": null,
|
||||||
|
"function_call": null,
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": [
|
||||||
|
{
|
||||||
|
"function": {
|
||||||
|
"arguments": " York",
|
||||||
|
"name": null
|
||||||
|
},
|
||||||
|
"id": "",
|
||||||
|
"index": 0,
|
||||||
|
"type": "function"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1741195536,
|
||||||
|
"id": "",
|
||||||
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": null,
|
||||||
|
"function_call": null,
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": [
|
||||||
|
{
|
||||||
|
"function": {
|
||||||
|
"arguments": "\",",
|
||||||
|
"name": null
|
||||||
|
},
|
||||||
|
"id": "",
|
||||||
|
"index": 0,
|
||||||
|
"type": "function"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1741195536,
|
||||||
|
"id": "",
|
||||||
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": null,
|
||||||
|
"function_call": null,
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": [
|
||||||
|
{
|
||||||
|
"function": {
|
||||||
|
"arguments": " \"",
|
||||||
|
"name": null
|
||||||
|
},
|
||||||
|
"id": "",
|
||||||
|
"index": 0,
|
||||||
|
"type": "function"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1741195536,
|
||||||
|
"id": "",
|
||||||
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": null,
|
||||||
|
"function_call": null,
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": [
|
||||||
|
{
|
||||||
|
"function": {
|
||||||
|
"arguments": "format",
|
||||||
|
"name": null
|
||||||
|
},
|
||||||
|
"id": "",
|
||||||
|
"index": 0,
|
||||||
|
"type": "function"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1741195536,
|
||||||
|
"id": "",
|
||||||
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": null,
|
||||||
|
"function_call": null,
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": [
|
||||||
|
{
|
||||||
|
"function": {
|
||||||
|
"arguments": "\":",
|
||||||
|
"name": null
|
||||||
|
},
|
||||||
|
"id": "",
|
||||||
|
"index": 0,
|
||||||
|
"type": "function"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1741195537,
|
||||||
|
"id": "",
|
||||||
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": null,
|
||||||
|
"function_call": null,
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": [
|
||||||
|
{
|
||||||
|
"function": {
|
||||||
|
"arguments": " \"",
|
||||||
|
"name": null
|
||||||
|
},
|
||||||
|
"id": "",
|
||||||
|
"index": 0,
|
||||||
|
"type": "function"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1741195537,
|
||||||
|
"id": "",
|
||||||
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": null,
|
||||||
|
"function_call": null,
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": [
|
||||||
|
{
|
||||||
|
"function": {
|
||||||
|
"arguments": "c",
|
||||||
|
"name": null
|
||||||
|
},
|
||||||
|
"id": "",
|
||||||
|
"index": 0,
|
||||||
|
"type": "function"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1741195537,
|
||||||
|
"id": "",
|
||||||
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": null,
|
||||||
|
"function_call": null,
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": [
|
||||||
|
{
|
||||||
|
"function": {
|
||||||
|
"arguments": "elsius",
|
||||||
|
"name": null
|
||||||
|
},
|
||||||
|
"id": "",
|
||||||
|
"index": 0,
|
||||||
|
"type": "function"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1741195537,
|
||||||
|
"id": "",
|
||||||
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": null,
|
||||||
|
"function_call": null,
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": [
|
||||||
|
{
|
||||||
|
"function": {
|
||||||
|
"arguments": "\"}}",
|
||||||
|
"name": null
|
||||||
|
},
|
||||||
|
"id": "",
|
||||||
|
"index": 0,
|
||||||
|
"type": "function"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1741195537,
|
||||||
|
"id": "",
|
||||||
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": null,
|
||||||
|
"function_call": null,
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": [
|
||||||
|
{
|
||||||
|
"function": {
|
||||||
|
"arguments": "<|eot_id|>",
|
||||||
|
"name": null
|
||||||
|
},
|
||||||
|
"id": "",
|
||||||
|
"index": 0,
|
||||||
|
"type": "function"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"finish_reason": "stop",
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1741195537,
|
||||||
|
"id": "",
|
||||||
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
]
|
@ -11,10 +11,10 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1728497461,
|
"created": 1741195545,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
"system_fingerprint": "2.4.2-dev0-native",
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
"usage": null
|
"usage": null
|
||||||
}
|
}
|
||||||
|
@ -3,25 +3,27 @@
|
|||||||
{
|
{
|
||||||
"delta": {
|
"delta": {
|
||||||
"role": "assistant",
|
"role": "assistant",
|
||||||
"tool_calls": {
|
"tool_calls": [
|
||||||
"function": {
|
{
|
||||||
"arguments": "<|eot_id|>",
|
"function": {
|
||||||
"name": null
|
"arguments": "<|eot_id|>",
|
||||||
},
|
"name": null
|
||||||
"id": "",
|
},
|
||||||
"index": 0,
|
"id": "",
|
||||||
"type": "function"
|
"index": 0,
|
||||||
}
|
"type": "function"
|
||||||
|
}
|
||||||
|
]
|
||||||
},
|
},
|
||||||
"finish_reason": "stop",
|
"finish_reason": "stop",
|
||||||
"index": 0,
|
"index": 0,
|
||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1732293254,
|
"created": 1741195554,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
"system_fingerprint": "2.4.1-dev0-native",
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
"usage": null
|
"usage": null
|
||||||
}
|
}
|
||||||
|
@ -11,10 +11,10 @@
|
|||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1729262528,
|
"created": 1741195551,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
"system_fingerprint": "2.3.2-dev0-native",
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
"usage": null
|
"usage": null
|
||||||
}
|
}
|
||||||
|
@ -4,25 +4,27 @@
|
|||||||
"delta": {
|
"delta": {
|
||||||
"content": null,
|
"content": null,
|
||||||
"role": "assistant",
|
"role": "assistant",
|
||||||
"tool_calls": {
|
"tool_calls": [
|
||||||
"function": {
|
{
|
||||||
"arguments": "<|eot_id|>",
|
"function": {
|
||||||
"name": null
|
"arguments": "<|eot_id|>",
|
||||||
},
|
"name": null
|
||||||
"id": "",
|
},
|
||||||
"index": 0,
|
"id": "",
|
||||||
"type": "function"
|
"index": 0,
|
||||||
}
|
"type": "function"
|
||||||
|
}
|
||||||
|
]
|
||||||
},
|
},
|
||||||
"finish_reason": "stop",
|
"finish_reason": "stop",
|
||||||
"index": 0,
|
"index": 0,
|
||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1732293246,
|
"created": 1741195548,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
"system_fingerprint": "2.4.1-dev0-native",
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
"usage": null
|
"usage": null
|
||||||
}
|
}
|
||||||
|
@ -4,25 +4,27 @@
|
|||||||
"delta": {
|
"delta": {
|
||||||
"content": null,
|
"content": null,
|
||||||
"role": "assistant",
|
"role": "assistant",
|
||||||
"tool_calls": {
|
"tool_calls": [
|
||||||
"function": {
|
{
|
||||||
"arguments": "<|eot_id|>",
|
"function": {
|
||||||
"name": null
|
"arguments": "<|eot_id|>",
|
||||||
},
|
"name": null
|
||||||
"id": "",
|
},
|
||||||
"index": 0,
|
"id": "",
|
||||||
"type": "function"
|
"index": 0,
|
||||||
}
|
"type": "function"
|
||||||
|
}
|
||||||
|
]
|
||||||
},
|
},
|
||||||
"finish_reason": "stop",
|
"finish_reason": "stop",
|
||||||
"index": 0,
|
"index": 0,
|
||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1732293235,
|
"created": 1741195541,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
"system_fingerprint": "2.4.1-dev0-native",
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
"usage": null
|
"usage": null
|
||||||
}
|
}
|
||||||
|
@ -13,11 +13,11 @@
|
|||||||
"usage": null
|
"usage": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1739932427,
|
"created": 1741195556,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"object": "chat.completion",
|
"object": "chat.completion",
|
||||||
"system_fingerprint": "3.1.1-dev0-native",
|
"system_fingerprint": "3.1.2-dev0-native",
|
||||||
"usage": {
|
"usage": {
|
||||||
"completion_tokens": 79,
|
"completion_tokens": 79,
|
||||||
"prompt_tokens": 103,
|
"prompt_tokens": 103,
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
import pytest
|
import pytest
|
||||||
import requests
|
import requests
|
||||||
import json
|
import json
|
||||||
|
from openai import OpenAI
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="module")
|
@pytest.fixture(scope="module")
|
||||||
@ -108,6 +109,38 @@ async def test_flash_llama_grammar_tools(flash_llama_grammar_tools, response_sna
|
|||||||
assert response == response_snapshot
|
assert response == response_snapshot
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
@pytest.mark.private
|
||||||
|
async def test_flash_llama_grammar_tools_openai(
|
||||||
|
flash_llama_grammar_tools, response_snapshot
|
||||||
|
):
|
||||||
|
client = OpenAI(api_key="xx", base_url=f"{flash_llama_grammar_tools.base_url}/v1")
|
||||||
|
stream = client.chat.completions.create(
|
||||||
|
model="tgi",
|
||||||
|
max_tokens=100,
|
||||||
|
seed=1,
|
||||||
|
tools=tools,
|
||||||
|
stream=True,
|
||||||
|
temperature=0.0,
|
||||||
|
messages=[
|
||||||
|
{
|
||||||
|
"role": "system",
|
||||||
|
"content": "Youre a helpful assistant! Answer the users question best you can.",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "What is the weather like in Brooklyn, New York?",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
chunks = []
|
||||||
|
for chunk in stream:
|
||||||
|
chunks.append(chunk)
|
||||||
|
|
||||||
|
assert chunks == response_snapshot
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@pytest.mark.private
|
@pytest.mark.private
|
||||||
async def test_flash_llama_grammar_tools_auto(
|
async def test_flash_llama_grammar_tools_auto(
|
||||||
@ -213,7 +246,9 @@ async def test_flash_llama_grammar_tools_stream(
|
|||||||
last_response = None
|
last_response = None
|
||||||
async for response in responses:
|
async for response in responses:
|
||||||
count += 1
|
count += 1
|
||||||
tool_calls_generated += response.choices[0].delta.tool_calls.function.arguments
|
tool_calls_generated += (
|
||||||
|
response.choices[0].delta.tool_calls[0].function.arguments
|
||||||
|
)
|
||||||
last_response = response
|
last_response = response
|
||||||
assert response.choices[0].delta.content is None
|
assert response.choices[0].delta.content is None
|
||||||
|
|
||||||
@ -360,7 +395,9 @@ async def test_flash_llama_grammar_tools_sea_creatures_stream_required(
|
|||||||
async for response in responses:
|
async for response in responses:
|
||||||
count += 1
|
count += 1
|
||||||
assert response.choices[0].delta.content is None
|
assert response.choices[0].delta.content is None
|
||||||
tool_calls_generated += response.choices[0].delta.tool_calls.function.arguments
|
tool_calls_generated += (
|
||||||
|
response.choices[0].delta.tool_calls[0].function.arguments
|
||||||
|
)
|
||||||
last_response = response
|
last_response = response
|
||||||
|
|
||||||
assert count == 29
|
assert count == 29
|
||||||
@ -458,8 +495,8 @@ async def test_flash_llama_grammar_tools_sea_creatures_stream_function_object(
|
|||||||
break
|
break
|
||||||
response = json.loads(line)
|
response = json.loads(line)
|
||||||
tool_calls_generated += response["choices"][0]["delta"]["tool_calls"][
|
tool_calls_generated += response["choices"][0]["delta"]["tool_calls"][
|
||||||
"function"
|
0
|
||||||
]["arguments"]
|
]["function"]["arguments"]
|
||||||
last_response = response
|
last_response = response
|
||||||
|
|
||||||
assert count == 39
|
assert count == 39
|
||||||
|
@ -13,6 +13,7 @@ dependencies = [
|
|||||||
"pytest-asyncio>=0.23.1",
|
"pytest-asyncio>=0.23.1",
|
||||||
"docker>=7",
|
"docker>=7",
|
||||||
"numpy>=2.0",
|
"numpy>=2.0",
|
||||||
|
"openai>=1.65",
|
||||||
]
|
]
|
||||||
|
|
||||||
[tool.isort]
|
[tool.isort]
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
# This file was autogenerated by uv via the following command:
|
# This file was autogenerated by uv via the following command:
|
||||||
# uv pip compile pyproject.toml --output-file requirements.txt
|
# uv pip compile pyproject.toml -o requirements.txt
|
||||||
aiohappyeyeballs==2.4.6
|
aiohappyeyeballs==2.4.6
|
||||||
# via aiohttp
|
# via aiohttp
|
||||||
aiohttp==3.11.12
|
aiohttp==3.11.12
|
||||||
@ -8,12 +8,21 @@ aiosignal==1.3.2
|
|||||||
# via aiohttp
|
# via aiohttp
|
||||||
annotated-types==0.7.0
|
annotated-types==0.7.0
|
||||||
# via pydantic
|
# via pydantic
|
||||||
|
anyio==4.8.0
|
||||||
|
# via
|
||||||
|
# httpx
|
||||||
|
# openai
|
||||||
attrs==25.1.0
|
attrs==25.1.0
|
||||||
# via aiohttp
|
# via aiohttp
|
||||||
certifi==2025.1.31
|
certifi==2025.1.31
|
||||||
# via requests
|
# via
|
||||||
|
# httpcore
|
||||||
|
# httpx
|
||||||
|
# requests
|
||||||
charset-normalizer==3.4.1
|
charset-normalizer==3.4.1
|
||||||
# via requests
|
# via requests
|
||||||
|
distro==1.9.0
|
||||||
|
# via openai
|
||||||
docker==7.1.0
|
docker==7.1.0
|
||||||
# via text-generation-integration-tests (pyproject.toml)
|
# via text-generation-integration-tests (pyproject.toml)
|
||||||
filelock==3.17.0
|
filelock==3.17.0
|
||||||
@ -24,20 +33,32 @@ frozenlist==1.5.0
|
|||||||
# aiosignal
|
# aiosignal
|
||||||
fsspec==2025.2.0
|
fsspec==2025.2.0
|
||||||
# via huggingface-hub
|
# via huggingface-hub
|
||||||
|
h11==0.14.0
|
||||||
|
# via httpcore
|
||||||
|
httpcore==1.0.7
|
||||||
|
# via httpx
|
||||||
|
httpx==0.28.1
|
||||||
|
# via openai
|
||||||
huggingface-hub==0.29.0
|
huggingface-hub==0.29.0
|
||||||
# via text-generation
|
# via text-generation
|
||||||
idna==3.10
|
idna==3.10
|
||||||
# via
|
# via
|
||||||
|
# anyio
|
||||||
|
# httpx
|
||||||
# requests
|
# requests
|
||||||
# yarl
|
# yarl
|
||||||
iniconfig==2.0.0
|
iniconfig==2.0.0
|
||||||
# via pytest
|
# via pytest
|
||||||
|
jiter==0.8.2
|
||||||
|
# via openai
|
||||||
multidict==6.1.0
|
multidict==6.1.0
|
||||||
# via
|
# via
|
||||||
# aiohttp
|
# aiohttp
|
||||||
# yarl
|
# yarl
|
||||||
numpy==2.2.3
|
numpy==2.2.3
|
||||||
# via text-generation-integration-tests (pyproject.toml)
|
# via text-generation-integration-tests (pyproject.toml)
|
||||||
|
openai==1.65.3
|
||||||
|
# via text-generation-integration-tests (pyproject.toml)
|
||||||
packaging==24.2
|
packaging==24.2
|
||||||
# via
|
# via
|
||||||
# huggingface-hub
|
# huggingface-hub
|
||||||
@ -51,6 +72,7 @@ propcache==0.2.1
|
|||||||
pydantic==2.10.6
|
pydantic==2.10.6
|
||||||
# via
|
# via
|
||||||
# text-generation-integration-tests (pyproject.toml)
|
# text-generation-integration-tests (pyproject.toml)
|
||||||
|
# openai
|
||||||
# text-generation
|
# text-generation
|
||||||
pydantic-core==2.27.2
|
pydantic-core==2.27.2
|
||||||
# via pydantic
|
# via pydantic
|
||||||
@ -67,15 +89,23 @@ requests==2.32.3
|
|||||||
# via
|
# via
|
||||||
# docker
|
# docker
|
||||||
# huggingface-hub
|
# huggingface-hub
|
||||||
|
sniffio==1.3.1
|
||||||
|
# via
|
||||||
|
# anyio
|
||||||
|
# openai
|
||||||
syrupy==4.8.1
|
syrupy==4.8.1
|
||||||
# via text-generation-integration-tests (pyproject.toml)
|
# via text-generation-integration-tests (pyproject.toml)
|
||||||
text-generation==0.7.0
|
text-generation==0.7.0
|
||||||
# via text-generation-integration-tests (pyproject.toml)
|
# via text-generation-integration-tests (pyproject.toml)
|
||||||
tqdm==4.67.1
|
tqdm==4.67.1
|
||||||
# via huggingface-hub
|
|
||||||
typing-extensions==4.12.2
|
|
||||||
# via
|
# via
|
||||||
# huggingface-hub
|
# huggingface-hub
|
||||||
|
# openai
|
||||||
|
typing-extensions==4.12.2
|
||||||
|
# via
|
||||||
|
# anyio
|
||||||
|
# huggingface-hub
|
||||||
|
# openai
|
||||||
# pydantic
|
# pydantic
|
||||||
# pydantic-core
|
# pydantic-core
|
||||||
urllib3==2.3.0
|
urllib3==2.3.0
|
||||||
|
@ -18,8 +18,8 @@ final: prev: {
|
|||||||
src = final.fetchFromGitHub {
|
src = final.fetchFromGitHub {
|
||||||
owner = "huggingface";
|
owner = "huggingface";
|
||||||
repo = "transformers";
|
repo = "transformers";
|
||||||
rev = "8d73a38606bc342b370afe1f42718b4828d95aaa";
|
rev = "v4.49.0";
|
||||||
hash = "sha256-MxroG6CWqrcmRS+eFt7Ej87TDOInN15aRPBUcaycKTI=";
|
hash = "sha256-drq7RWoRaRejiQjCUHIYuzaKa9rA4eQZI2do74scp1c=";
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
|
@ -730,7 +730,7 @@ pub(crate) struct ChatCompletionChoice {
|
|||||||
pub struct ToolCallDelta {
|
pub struct ToolCallDelta {
|
||||||
#[schema(example = "assistant")]
|
#[schema(example = "assistant")]
|
||||||
role: String,
|
role: String,
|
||||||
tool_calls: DeltaToolCall,
|
tool_calls: Vec<DeltaToolCall>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug, Serialize, ToSchema)]
|
#[derive(Clone, Debug, Serialize, ToSchema)]
|
||||||
@ -774,7 +774,7 @@ impl ChatCompletionChunk {
|
|||||||
}),
|
}),
|
||||||
(None, Some(tool_calls)) => ChatCompletionDelta::Tool(ToolCallDelta {
|
(None, Some(tool_calls)) => ChatCompletionDelta::Tool(ToolCallDelta {
|
||||||
role: "assistant".to_string(),
|
role: "assistant".to_string(),
|
||||||
tool_calls: DeltaToolCall {
|
tool_calls: vec![DeltaToolCall {
|
||||||
index: 0,
|
index: 0,
|
||||||
id: String::new(),
|
id: String::new(),
|
||||||
r#type: "function".to_string(),
|
r#type: "function".to_string(),
|
||||||
@ -782,7 +782,7 @@ impl ChatCompletionChunk {
|
|||||||
name: None,
|
name: None,
|
||||||
arguments: tool_calls[0].to_string(),
|
arguments: tool_calls[0].to_string(),
|
||||||
},
|
},
|
||||||
},
|
}],
|
||||||
}),
|
}),
|
||||||
(None, None) => ChatCompletionDelta::Chat(TextMessage {
|
(None, None) => ChatCompletionDelta::Chat(TextMessage {
|
||||||
role: "assistant".to_string(),
|
role: "assistant".to_string(),
|
||||||
|
Loading…
Reference in New Issue
Block a user