mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-19 13:52:07 +00:00
Merge 65c6008847
into 8f8819795f
This commit is contained in:
commit
366cfa8f05
@ -1771,6 +1771,24 @@
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "object",
|
||||
"required": [
|
||||
"type",
|
||||
"value"
|
||||
],
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"json_schema"
|
||||
]
|
||||
},
|
||||
"value": {
|
||||
"$ref": "#/components/schemas/JsonSchemaConfig"
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"discriminator": {
|
||||
@ -1864,6 +1882,22 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"JsonSchemaConfig": {
|
||||
"type": "object",
|
||||
"required": [
|
||||
"schema"
|
||||
],
|
||||
"properties": {
|
||||
"name": {
|
||||
"type": "string",
|
||||
"description": "Optional name identifier for the schema",
|
||||
"nullable": true
|
||||
},
|
||||
"schema": {
|
||||
"description": "The actual JSON schema definition"
|
||||
}
|
||||
}
|
||||
},
|
||||
"Message": {
|
||||
"allOf": [
|
||||
{
|
||||
|
@ -0,0 +1,23 @@
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"message": {
|
||||
"content": "{ \"unit\": \"fahrenheit\", \"temperature\": [ 72, 79, 88 ] }",
|
||||
"role": "assistant"
|
||||
}
|
||||
}
|
||||
],
|
||||
"created": 1740095072,
|
||||
"id": "",
|
||||
"model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
|
||||
"object": "chat.completion",
|
||||
"system_fingerprint": "3.1.1-dev0-native",
|
||||
"usage": {
|
||||
"completion_tokens": 29,
|
||||
"prompt_tokens": 135,
|
||||
"total_tokens": 164
|
||||
}
|
||||
}
|
@ -0,0 +1,23 @@
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"message": {
|
||||
"content": "{ \"unit\": \"fahrenheit\", \"temperature\": [ 72, 79, 88 ] }",
|
||||
"role": "assistant"
|
||||
}
|
||||
}
|
||||
],
|
||||
"created": 1740095073,
|
||||
"id": "",
|
||||
"model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
|
||||
"object": "chat.completion",
|
||||
"system_fingerprint": "3.1.1-dev0-native",
|
||||
"usage": {
|
||||
"completion_tokens": 29,
|
||||
"prompt_tokens": 135,
|
||||
"total_tokens": 164
|
||||
}
|
||||
}
|
@ -0,0 +1,23 @@
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"message": {
|
||||
"content": "{\"firstName\":\"David\",\"lastName\":\"(Not provided)\",\"hobby\":\": Trees and nature\",\"numCats\":2}",
|
||||
"role": "assistant"
|
||||
}
|
||||
}
|
||||
],
|
||||
"created": 1741975610,
|
||||
"id": "",
|
||||
"model": "google/gemma-3-4b-it",
|
||||
"object": "chat.completion",
|
||||
"system_fingerprint": "3.2.1-dev0-native",
|
||||
"usage": {
|
||||
"completion_tokens": 37,
|
||||
"prompt_tokens": 32,
|
||||
"total_tokens": 69
|
||||
}
|
||||
}
|
@ -0,0 +1,23 @@
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"message": {
|
||||
"content": "{\"name\":\"John Smith\",\"age\":30,\"address\":{\"street\":\"Maple Street\",\"city\":\"Boston\"},\"hobbies\":[\"botany\",\", \",\"astronomy\",\", \",\"solving mathematical puzzles\"]}",
|
||||
"role": "assistant"
|
||||
}
|
||||
}
|
||||
],
|
||||
"created": 1741975505,
|
||||
"id": "",
|
||||
"model": "google/gemma-3-4b-it",
|
||||
"object": "chat.completion",
|
||||
"system_fingerprint": "3.2.1-dev0-native",
|
||||
"usage": {
|
||||
"completion_tokens": 50,
|
||||
"prompt_tokens": 37,
|
||||
"total_tokens": 87
|
||||
}
|
||||
}
|
@ -0,0 +1,743 @@
|
||||
[
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "{",
|
||||
"role": "assistant"
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1741975615,
|
||||
"id": "",
|
||||
"model": "google/gemma-3-4b-it",
|
||||
"object": "chat.completion.chunk",
|
||||
"system_fingerprint": "3.2.1-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "\"",
|
||||
"role": "assistant"
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1741975615,
|
||||
"id": "",
|
||||
"model": "google/gemma-3-4b-it",
|
||||
"object": "chat.completion.chunk",
|
||||
"system_fingerprint": "3.2.1-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "f",
|
||||
"role": "assistant"
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1741975615,
|
||||
"id": "",
|
||||
"model": "google/gemma-3-4b-it",
|
||||
"object": "chat.completion.chunk",
|
||||
"system_fingerprint": "3.2.1-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "irs",
|
||||
"role": "assistant"
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1741975615,
|
||||
"id": "",
|
||||
"model": "google/gemma-3-4b-it",
|
||||
"object": "chat.completion.chunk",
|
||||
"system_fingerprint": "3.2.1-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "t",
|
||||
"role": "assistant"
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1741975615,
|
||||
"id": "",
|
||||
"model": "google/gemma-3-4b-it",
|
||||
"object": "chat.completion.chunk",
|
||||
"system_fingerprint": "3.2.1-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "Name",
|
||||
"role": "assistant"
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1741975615,
|
||||
"id": "",
|
||||
"model": "google/gemma-3-4b-it",
|
||||
"object": "chat.completion.chunk",
|
||||
"system_fingerprint": "3.2.1-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "\":",
|
||||
"role": "assistant"
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1741975615,
|
||||
"id": "",
|
||||
"model": "google/gemma-3-4b-it",
|
||||
"object": "chat.completion.chunk",
|
||||
"system_fingerprint": "3.2.1-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "\"",
|
||||
"role": "assistant"
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1741975615,
|
||||
"id": "",
|
||||
"model": "google/gemma-3-4b-it",
|
||||
"object": "chat.completion.chunk",
|
||||
"system_fingerprint": "3.2.1-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "David",
|
||||
"role": "assistant"
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1741975615,
|
||||
"id": "",
|
||||
"model": "google/gemma-3-4b-it",
|
||||
"object": "chat.completion.chunk",
|
||||
"system_fingerprint": "3.2.1-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "\",",
|
||||
"role": "assistant"
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1741975615,
|
||||
"id": "",
|
||||
"model": "google/gemma-3-4b-it",
|
||||
"object": "chat.completion.chunk",
|
||||
"system_fingerprint": "3.2.1-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "\"",
|
||||
"role": "assistant"
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1741975615,
|
||||
"id": "",
|
||||
"model": "google/gemma-3-4b-it",
|
||||
"object": "chat.completion.chunk",
|
||||
"system_fingerprint": "3.2.1-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "l",
|
||||
"role": "assistant"
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1741975615,
|
||||
"id": "",
|
||||
"model": "google/gemma-3-4b-it",
|
||||
"object": "chat.completion.chunk",
|
||||
"system_fingerprint": "3.2.1-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "ast",
|
||||
"role": "assistant"
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1741975615,
|
||||
"id": "",
|
||||
"model": "google/gemma-3-4b-it",
|
||||
"object": "chat.completion.chunk",
|
||||
"system_fingerprint": "3.2.1-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "Name",
|
||||
"role": "assistant"
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1741975615,
|
||||
"id": "",
|
||||
"model": "google/gemma-3-4b-it",
|
||||
"object": "chat.completion.chunk",
|
||||
"system_fingerprint": "3.2.1-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "\":",
|
||||
"role": "assistant"
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1741975615,
|
||||
"id": "",
|
||||
"model": "google/gemma-3-4b-it",
|
||||
"object": "chat.completion.chunk",
|
||||
"system_fingerprint": "3.2.1-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "\"",
|
||||
"role": "assistant"
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1741975615,
|
||||
"id": "",
|
||||
"model": "google/gemma-3-4b-it",
|
||||
"object": "chat.completion.chunk",
|
||||
"system_fingerprint": "3.2.1-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "Unknown",
|
||||
"role": "assistant"
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1741975615,
|
||||
"id": "",
|
||||
"model": "google/gemma-3-4b-it",
|
||||
"object": "chat.completion.chunk",
|
||||
"system_fingerprint": "3.2.1-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "\",",
|
||||
"role": "assistant"
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1741975616,
|
||||
"id": "",
|
||||
"model": "google/gemma-3-4b-it",
|
||||
"object": "chat.completion.chunk",
|
||||
"system_fingerprint": "3.2.1-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "\"",
|
||||
"role": "assistant"
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1741975616,
|
||||
"id": "",
|
||||
"model": "google/gemma-3-4b-it",
|
||||
"object": "chat.completion.chunk",
|
||||
"system_fingerprint": "3.2.1-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "h",
|
||||
"role": "assistant"
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1741975616,
|
||||
"id": "",
|
||||
"model": "google/gemma-3-4b-it",
|
||||
"object": "chat.completion.chunk",
|
||||
"system_fingerprint": "3.2.1-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "obb",
|
||||
"role": "assistant"
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1741975616,
|
||||
"id": "",
|
||||
"model": "google/gemma-3-4b-it",
|
||||
"object": "chat.completion.chunk",
|
||||
"system_fingerprint": "3.2.1-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "y",
|
||||
"role": "assistant"
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1741975616,
|
||||
"id": "",
|
||||
"model": "google/gemma-3-4b-it",
|
||||
"object": "chat.completion.chunk",
|
||||
"system_fingerprint": "3.2.1-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "\":",
|
||||
"role": "assistant"
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1741975616,
|
||||
"id": "",
|
||||
"model": "google/gemma-3-4b-it",
|
||||
"object": "chat.completion.chunk",
|
||||
"system_fingerprint": "3.2.1-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "\",",
|
||||
"role": "assistant"
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1741975616,
|
||||
"id": "",
|
||||
"model": "google/gemma-3-4b-it",
|
||||
"object": "chat.completion.chunk",
|
||||
"system_fingerprint": "3.2.1-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " \\\"",
|
||||
"role": "assistant"
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1741975616,
|
||||
"id": "",
|
||||
"model": "google/gemma-3-4b-it",
|
||||
"object": "chat.completion.chunk",
|
||||
"system_fingerprint": "3.2.1-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "riding",
|
||||
"role": "assistant"
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1741975616,
|
||||
"id": "",
|
||||
"model": "google/gemma-3-4b-it",
|
||||
"object": "chat.completion.chunk",
|
||||
"system_fingerprint": "3.2.1-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " bicycles",
|
||||
"role": "assistant"
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1741975616,
|
||||
"id": "",
|
||||
"model": "google/gemma-3-4b-it",
|
||||
"object": "chat.completion.chunk",
|
||||
"system_fingerprint": "3.2.1-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "\\\",",
|
||||
"role": "assistant"
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1741975616,
|
||||
"id": "",
|
||||
"model": "google/gemma-3-4b-it",
|
||||
"object": "chat.completion.chunk",
|
||||
"system_fingerprint": "3.2.1-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " \\\"",
|
||||
"role": "assistant"
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1741975616,
|
||||
"id": "",
|
||||
"model": "google/gemma-3-4b-it",
|
||||
"object": "chat.completion.chunk",
|
||||
"system_fingerprint": "3.2.1-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "having",
|
||||
"role": "assistant"
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1741975616,
|
||||
"id": "",
|
||||
"model": "google/gemma-3-4b-it",
|
||||
"object": "chat.completion.chunk",
|
||||
"system_fingerprint": "3.2.1-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " cats",
|
||||
"role": "assistant"
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1741975616,
|
||||
"id": "",
|
||||
"model": "google/gemma-3-4b-it",
|
||||
"object": "chat.completion.chunk",
|
||||
"system_fingerprint": "3.2.1-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "\\\"\",",
|
||||
"role": "assistant"
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1741975616,
|
||||
"id": "",
|
||||
"model": "google/gemma-3-4b-it",
|
||||
"object": "chat.completion.chunk",
|
||||
"system_fingerprint": "3.2.1-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "\"",
|
||||
"role": "assistant"
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1741975616,
|
||||
"id": "",
|
||||
"model": "google/gemma-3-4b-it",
|
||||
"object": "chat.completion.chunk",
|
||||
"system_fingerprint": "3.2.1-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "num",
|
||||
"role": "assistant"
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1741975616,
|
||||
"id": "",
|
||||
"model": "google/gemma-3-4b-it",
|
||||
"object": "chat.completion.chunk",
|
||||
"system_fingerprint": "3.2.1-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "Cats",
|
||||
"role": "assistant"
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1741975616,
|
||||
"id": "",
|
||||
"model": "google/gemma-3-4b-it",
|
||||
"object": "chat.completion.chunk",
|
||||
"system_fingerprint": "3.2.1-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "\":",
|
||||
"role": "assistant"
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1741975616,
|
||||
"id": "",
|
||||
"model": "google/gemma-3-4b-it",
|
||||
"object": "chat.completion.chunk",
|
||||
"system_fingerprint": "3.2.1-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "2",
|
||||
"role": "assistant"
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1741975616,
|
||||
"id": "",
|
||||
"model": "google/gemma-3-4b-it",
|
||||
"object": "chat.completion.chunk",
|
||||
"system_fingerprint": "3.2.1-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "}",
|
||||
"role": "assistant"
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1741975616,
|
||||
"id": "",
|
||||
"model": "google/gemma-3-4b-it",
|
||||
"object": "chat.completion.chunk",
|
||||
"system_fingerprint": "3.2.1-dev0-native",
|
||||
"usage": null
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "",
|
||||
"role": "assistant"
|
||||
},
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1741975616,
|
||||
"id": "",
|
||||
"model": "google/gemma-3-4b-it",
|
||||
"object": "chat.completion.chunk",
|
||||
"system_fingerprint": "3.2.1-dev0-native",
|
||||
"usage": null
|
||||
}
|
||||
]
|
@ -29,26 +29,55 @@ async def test_grammar_response_format_llama_json(llama_grammar, response_snapsh
|
||||
unit: str
|
||||
temperature: List[int]
|
||||
|
||||
json_payload = {
|
||||
"model": "tgi",
|
||||
"messages": [
|
||||
{
|
||||
"role": "system",
|
||||
"content": f"Respond to the users questions and answer them in the following format: {Weather.schema()}",
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "What's the weather like the next 3 days in San Francisco, CA?",
|
||||
},
|
||||
],
|
||||
"seed": 42,
|
||||
"max_tokens": 500,
|
||||
"response_format": {"type": "json_object", "value": Weather.schema()},
|
||||
}
|
||||
# send the request
|
||||
response = requests.post(
|
||||
f"{llama_grammar.base_url}/v1/chat/completions",
|
||||
headers=llama_grammar.headers,
|
||||
json={
|
||||
"model": "tgi",
|
||||
"messages": [
|
||||
{
|
||||
"role": "system",
|
||||
"content": f"Respond to the users questions and answer them in the following format: {Weather.schema()}",
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "What's the weather like the next 3 days in San Francisco, CA?",
|
||||
},
|
||||
],
|
||||
"seed": 42,
|
||||
"max_tokens": 500,
|
||||
"response_format": {"type": "json_object", "value": Weather.schema()},
|
||||
},
|
||||
json=json_payload,
|
||||
)
|
||||
|
||||
chat_completion = response.json()
|
||||
called = chat_completion["choices"][0]["message"]["content"]
|
||||
|
||||
assert response.status_code == 200
|
||||
assert called == '{ "unit": "fahrenheit", "temperature": [ 72, 79, 88 ] }'
|
||||
assert chat_completion == response_snapshot
|
||||
|
||||
json_payload["response_format"]["type"] = "json"
|
||||
response = requests.post(
|
||||
f"{llama_grammar.base_url}/v1/chat/completions",
|
||||
headers=llama_grammar.headers,
|
||||
json=json_payload,
|
||||
)
|
||||
|
||||
chat_completion = response.json()
|
||||
called = chat_completion["choices"][0]["message"]["content"]
|
||||
|
||||
assert response.status_code == 200
|
||||
assert called == '{ "unit": "fahrenheit", "temperature": [ 72, 79, 88 ] }'
|
||||
assert chat_completion == response_snapshot
|
||||
|
||||
json_payload["response_format"]["type"] = "json_schema"
|
||||
response = requests.post(
|
||||
f"{llama_grammar.base_url}/v1/chat/completions",
|
||||
headers=llama_grammar.headers,
|
||||
json=json_payload,
|
||||
)
|
||||
|
||||
chat_completion = response.json()
|
||||
|
209
integration-tests/models/test_json_schema_constrain.py
Normal file
209
integration-tests/models/test_json_schema_constrain.py
Normal file
@ -0,0 +1,209 @@
|
||||
import pytest
|
||||
import json
|
||||
import requests
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def model_handle(launcher):
|
||||
"""Fixture to provide the base URL for API calls."""
|
||||
with launcher(
|
||||
"google/gemma-3-4b-it",
|
||||
num_shard=2,
|
||||
disable_grammar_support=False,
|
||||
) as handle:
|
||||
yield handle
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
async def model_fixture(model_handle):
|
||||
await model_handle.health(300)
|
||||
return model_handle.client
|
||||
|
||||
|
||||
# Sample JSON Schema for testing
|
||||
person_schema = {
|
||||
"type": "object",
|
||||
"$id": "https://example.com/person.schema.json",
|
||||
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
||||
"title": "Person",
|
||||
"properties": {
|
||||
"firstName": {
|
||||
"type": "string",
|
||||
"description": "The person's first name.",
|
||||
"minLength": 4,
|
||||
},
|
||||
"lastName": {
|
||||
"type": "string",
|
||||
"description": "The person's last name.",
|
||||
"minLength": 4,
|
||||
},
|
||||
"hobby": {
|
||||
"description": "The person's hobby.",
|
||||
"type": "string",
|
||||
"minLength": 4,
|
||||
},
|
||||
"numCats": {
|
||||
"description": "The number of cats the person has.",
|
||||
"type": "integer",
|
||||
"minimum": 0,
|
||||
},
|
||||
},
|
||||
"required": ["firstName", "lastName", "hobby", "numCats"],
|
||||
}
|
||||
|
||||
# More complex schema for testing nested objects and arrays
|
||||
complex_schema = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {"type": "string"},
|
||||
"age": {"type": "integer", "minimum": 0},
|
||||
"address": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"street": {"type": "string"},
|
||||
"city": {"type": "string"},
|
||||
"postalCode": {"type": "string"},
|
||||
},
|
||||
"required": ["street", "city"],
|
||||
},
|
||||
"hobbies": {"type": "array", "items": {"type": "string"}, "minItems": 1},
|
||||
},
|
||||
"required": ["name", "age", "hobbies"],
|
||||
}
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.private
|
||||
async def test_json_schema_basic(model_fixture, response_snapshot):
|
||||
"""Test basic JSON schema validation with the person schema."""
|
||||
response = requests.post(
|
||||
f"{model_fixture.base_url}/v1/chat/completions",
|
||||
json={
|
||||
"model": "tgi",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "David is a person who likes trees and nature. He enjoys studying math and science. He has 2 cats.",
|
||||
},
|
||||
],
|
||||
"seed": 42,
|
||||
"temperature": 0.0,
|
||||
"response_format": {
|
||||
"type": "json_schema",
|
||||
"value": {"name": "person", "strict": True, "schema": person_schema},
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
result = response.json()
|
||||
|
||||
# Validate response format
|
||||
content = result["choices"][0]["message"]["content"]
|
||||
parsed_content = json.loads(content)
|
||||
|
||||
assert "firstName" in parsed_content
|
||||
assert "lastName" in parsed_content
|
||||
assert "hobby" in parsed_content
|
||||
assert "numCats" in parsed_content
|
||||
assert isinstance(parsed_content["numCats"], int)
|
||||
assert parsed_content["numCats"] >= 0
|
||||
assert result == response_snapshot
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.private
|
||||
async def test_json_schema_complex(model_fixture, response_snapshot):
|
||||
"""Test complex JSON schema with nested objects and arrays."""
|
||||
response = requests.post(
|
||||
f"{model_fixture.base_url}/v1/chat/completions",
|
||||
json={
|
||||
"model": "tgi",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "John Smith is 30 years old. He lives on Maple Street in Boston. He enjoys botany, astronomy, and solving mathematical puzzles.",
|
||||
},
|
||||
],
|
||||
"seed": 42,
|
||||
"temperature": 0.0,
|
||||
"response_format": {
|
||||
"type": "json_schema",
|
||||
"value": {
|
||||
"name": "complex_person",
|
||||
"strict": True,
|
||||
"schema": complex_schema,
|
||||
},
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
result = response.json()
|
||||
|
||||
# Validate response format
|
||||
content = result["choices"][0]["message"]["content"]
|
||||
parsed_content = json.loads(content)
|
||||
|
||||
assert "name" in parsed_content
|
||||
assert "age" in parsed_content
|
||||
assert "hobbies" in parsed_content
|
||||
assert "address" in parsed_content
|
||||
assert "street" in parsed_content["address"]
|
||||
assert "city" in parsed_content["address"]
|
||||
assert isinstance(parsed_content["hobbies"], list)
|
||||
assert len(parsed_content["hobbies"]) >= 1
|
||||
assert result == response_snapshot
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.private
|
||||
async def test_json_schema_stream(model_fixture, response_snapshot):
|
||||
"""Test JSON schema validation with streaming."""
|
||||
response = requests.post(
|
||||
f"{model_fixture.base_url}/v1/chat/completions",
|
||||
json={
|
||||
"model": "tgi",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "David is a person who likes to ride bicycles. He has 2 cats.",
|
||||
},
|
||||
],
|
||||
"seed": 42,
|
||||
"temperature": 0.0,
|
||||
"response_format": {
|
||||
"type": "json_schema",
|
||||
"value": {"name": "person", "strict": True, "schema": person_schema},
|
||||
},
|
||||
"stream": True,
|
||||
},
|
||||
stream=True,
|
||||
)
|
||||
|
||||
chunks = []
|
||||
content_generated = ""
|
||||
|
||||
for line in response.iter_lines():
|
||||
if line:
|
||||
# Remove the "data: " prefix and handle the special case of "[DONE]"
|
||||
data = line.decode("utf-8")
|
||||
if data.startswith("data: "):
|
||||
data = data[6:]
|
||||
if data != "[DONE]":
|
||||
chunk = json.loads(data)
|
||||
chunks.append(chunk)
|
||||
if "choices" in chunk and len(chunk["choices"]) > 0:
|
||||
if (
|
||||
"delta" in chunk["choices"][0]
|
||||
and "content" in chunk["choices"][0]["delta"]
|
||||
):
|
||||
content_generated += chunk["choices"][0]["delta"]["content"]
|
||||
|
||||
# Validate the final assembled JSON
|
||||
parsed_content = json.loads(content_generated)
|
||||
assert "firstName" in parsed_content
|
||||
assert "lastName" in parsed_content
|
||||
assert "hobby" in parsed_content
|
||||
assert "numCats" in parsed_content
|
||||
assert isinstance(parsed_content["numCats"], int)
|
||||
assert parsed_content["numCats"] >= 0
|
||||
assert chunks == response_snapshot
|
@ -222,6 +222,17 @@ impl HubProcessorConfig {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Deserialize, ToSchema, Serialize)]
|
||||
#[cfg_attr(test, derive(PartialEq))]
|
||||
struct JsonSchemaConfig {
|
||||
/// Optional name identifier for the schema
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
name: Option<String>,
|
||||
|
||||
/// The actual JSON schema definition
|
||||
schema: serde_json::Value,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Deserialize, ToSchema, Serialize)]
|
||||
#[cfg_attr(test, derive(PartialEq))]
|
||||
#[serde(tag = "type", content = "value")]
|
||||
@ -234,8 +245,16 @@ pub(crate) enum GrammarType {
|
||||
#[serde(alias = "json_object")]
|
||||
#[schema(example = json ! ({"properties": {"location":{"type": "string"}}}))]
|
||||
Json(serde_json::Value),
|
||||
|
||||
#[serde(rename = "regex")]
|
||||
Regex(String),
|
||||
|
||||
/// A JSON Schema specification with additional metadata.
|
||||
///
|
||||
/// Includes an optional name for the schema, an optional strict flag, and the required schema definition.
|
||||
#[serde(rename = "json_schema")]
|
||||
#[schema(example = json ! ({"schema": {"properties": {"name": {"type": "string"}, "age": {"type": "integer"}}}, "name": "person_info", "strict": true}))]
|
||||
JsonSchema(JsonSchemaConfig),
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize, ToSchema)]
|
||||
|
@ -14,7 +14,6 @@ use crate::sagemaker::{
|
||||
};
|
||||
use crate::validation::ValidationError;
|
||||
use crate::vertex::vertex_compatibility;
|
||||
use crate::ChatTokenizeResponse;
|
||||
use crate::{
|
||||
usage_stats, BestOfSequence, Details, ErrorResponse, FinishReason, FunctionName,
|
||||
GenerateParameters, GenerateRequest, GenerateResponse, GrammarType, HubModelInfo,
|
||||
@ -29,6 +28,7 @@ use crate::{
|
||||
ChatRequest, Chunk, CompatGenerateRequest, Completion, CompletionComplete, CompletionFinal,
|
||||
CompletionRequest, CompletionType, DeltaToolCall, Function, Prompt, Tool,
|
||||
};
|
||||
use crate::{ChatTokenizeResponse, JsonSchemaConfig};
|
||||
use crate::{FunctionDefinition, HubPreprocessorConfig, ToolCall, ToolChoice};
|
||||
use crate::{MessageBody, ModelInfo, ModelsInfo};
|
||||
use async_stream::__private::AsyncStream;
|
||||
@ -1362,6 +1362,7 @@ CompatGenerateRequest,
|
||||
SagemakerRequest,
|
||||
GenerateRequest,
|
||||
GrammarType,
|
||||
JsonSchemaConfig,
|
||||
ChatRequest,
|
||||
Message,
|
||||
MessageContent,
|
||||
|
@ -380,6 +380,28 @@ impl Validation {
|
||||
|
||||
ValidGrammar::Regex(grammar_regex.to_string())
|
||||
}
|
||||
GrammarType::JsonSchema(schema_config) => {
|
||||
// Extract the actual schema for validation
|
||||
let json = &schema_config.schema;
|
||||
|
||||
// Check if the json is a valid JSONSchema
|
||||
jsonschema::draft202012::meta::validate(json)
|
||||
.map_err(|e| ValidationError::InvalidGrammar(e.to_string()))?;
|
||||
|
||||
// The schema can be valid but lack properties.
|
||||
// We need properties for the grammar to be successfully parsed in Python.
|
||||
// Therefore, we must check and throw an error if properties are missing.
|
||||
json.get("properties")
|
||||
.ok_or(ValidationError::InvalidGrammar(
|
||||
"Grammar must have a 'properties' field".to_string(),
|
||||
))?;
|
||||
|
||||
// Do compilation in the router for performance
|
||||
let grammar_regex = json_schema_to_regex(json, None, json)
|
||||
.map_err(ValidationError::RegexFromSchema)?;
|
||||
|
||||
ValidGrammar::Regex(grammar_regex.to_string())
|
||||
}
|
||||
GrammarType::Regex(regex) => ValidGrammar::Regex(regex),
|
||||
};
|
||||
Some(valid_grammar)
|
||||
|
Loading…
Reference in New Issue
Block a user