diff --git a/docs/openapi.json b/docs/openapi.json index ad5124798..84ba58858 100644 --- a/docs/openapi.json +++ b/docs/openapi.json @@ -1771,6 +1771,24 @@ "type": "string" } } + }, + { + "type": "object", + "required": [ + "type", + "value" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "json_schema" + ] + }, + "value": { + "$ref": "#/components/schemas/JsonSchemaConfig" + } + } } ], "discriminator": { @@ -1864,6 +1882,22 @@ } } }, + "JsonSchemaConfig": { + "type": "object", + "required": [ + "schema" + ], + "properties": { + "name": { + "type": "string", + "description": "Optional name identifier for the schema", + "nullable": true + }, + "schema": { + "description": "The actual JSON schema definition" + } + } + }, "Message": { "allOf": [ { diff --git a/integration-tests/models/__snapshots__/test_grammar_response_format_llama/test_grammar_response_format_llama_json.1.json b/integration-tests/models/__snapshots__/test_grammar_response_format_llama/test_grammar_response_format_llama_json.1.json new file mode 100644 index 000000000..c4f804fcf --- /dev/null +++ b/integration-tests/models/__snapshots__/test_grammar_response_format_llama/test_grammar_response_format_llama_json.1.json @@ -0,0 +1,23 @@ +{ + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "logprobs": null, + "message": { + "content": "{ \"unit\": \"fahrenheit\", \"temperature\": [ 72, 79, 88 ] }", + "role": "assistant" + } + } + ], + "created": 1740095072, + "id": "", + "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", + "object": "chat.completion", + "system_fingerprint": "3.1.1-dev0-native", + "usage": { + "completion_tokens": 29, + "prompt_tokens": 135, + "total_tokens": 164 + } +} diff --git a/integration-tests/models/__snapshots__/test_grammar_response_format_llama/test_grammar_response_format_llama_json.2.json b/integration-tests/models/__snapshots__/test_grammar_response_format_llama/test_grammar_response_format_llama_json.2.json new file mode 100644 index 000000000..1be656eeb --- /dev/null +++ b/integration-tests/models/__snapshots__/test_grammar_response_format_llama/test_grammar_response_format_llama_json.2.json @@ -0,0 +1,23 @@ +{ + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "logprobs": null, + "message": { + "content": "{ \"unit\": \"fahrenheit\", \"temperature\": [ 72, 79, 88 ] }", + "role": "assistant" + } + } + ], + "created": 1740095073, + "id": "", + "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", + "object": "chat.completion", + "system_fingerprint": "3.1.1-dev0-native", + "usage": { + "completion_tokens": 29, + "prompt_tokens": 135, + "total_tokens": 164 + } +} diff --git a/integration-tests/models/__snapshots__/test_json_schema_constrain/test_json_schema_basic.json b/integration-tests/models/__snapshots__/test_json_schema_constrain/test_json_schema_basic.json new file mode 100644 index 000000000..18e7242c0 --- /dev/null +++ b/integration-tests/models/__snapshots__/test_json_schema_constrain/test_json_schema_basic.json @@ -0,0 +1,23 @@ +{ + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "logprobs": null, + "message": { + "content": "{\"firstName\":\"David\",\"lastName\":\"(Not provided)\",\"hobby\":\": Trees and nature\",\"numCats\":2}", + "role": "assistant" + } + } + ], + "created": 1741975610, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion", + "system_fingerprint": "3.2.1-dev0-native", + "usage": { + "completion_tokens": 37, + "prompt_tokens": 32, + "total_tokens": 69 + } +} diff --git a/integration-tests/models/__snapshots__/test_json_schema_constrain/test_json_schema_complex.json b/integration-tests/models/__snapshots__/test_json_schema_constrain/test_json_schema_complex.json new file mode 100644 index 000000000..9efee3299 --- /dev/null +++ b/integration-tests/models/__snapshots__/test_json_schema_constrain/test_json_schema_complex.json @@ -0,0 +1,23 @@ +{ + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "logprobs": null, + "message": { + "content": "{\"name\":\"John Smith\",\"age\":30,\"address\":{\"street\":\"Maple Street\",\"city\":\"Boston\"},\"hobbies\":[\"botany\",\", \",\"astronomy\",\", \",\"solving mathematical puzzles\"]}", + "role": "assistant" + } + } + ], + "created": 1741975505, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion", + "system_fingerprint": "3.2.1-dev0-native", + "usage": { + "completion_tokens": 50, + "prompt_tokens": 37, + "total_tokens": 87 + } +} diff --git a/integration-tests/models/__snapshots__/test_json_schema_constrain/test_json_schema_stream.json b/integration-tests/models/__snapshots__/test_json_schema_constrain/test_json_schema_stream.json new file mode 100644 index 000000000..943fad302 --- /dev/null +++ b/integration-tests/models/__snapshots__/test_json_schema_constrain/test_json_schema_stream.json @@ -0,0 +1,743 @@ +[ + { + "choices": [ + { + "delta": { + "content": "{", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975615, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "\"", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975615, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "f", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975615, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "irs", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975615, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "t", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975615, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "Name", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975615, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "\":", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975615, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "\"", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975615, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "David", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975615, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "\",", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975615, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "\"", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975615, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "l", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975615, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "ast", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975615, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "Name", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975615, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "\":", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975615, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "\"", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975615, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "Unknown", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975615, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "\",", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975616, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "\"", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975616, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "h", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975616, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "obb", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975616, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "y", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975616, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "\":", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975616, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "\",", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975616, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " \\\"", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975616, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "riding", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975616, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " bicycles", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975616, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "\\\",", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975616, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " \\\"", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975616, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "having", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975616, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " cats", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975616, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "\\\"\",", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975616, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "\"", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975616, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "num", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975616, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "Cats", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975616, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "\":", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975616, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "2", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975616, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "}", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975616, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "", + "role": "assistant" + }, + "finish_reason": "stop", + "index": 0, + "logprobs": null + } + ], + "created": 1741975616, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + } +] diff --git a/integration-tests/models/test_grammar_response_format_llama.py b/integration-tests/models/test_grammar_response_format_llama.py index f2a8a96da..d3ae735a7 100644 --- a/integration-tests/models/test_grammar_response_format_llama.py +++ b/integration-tests/models/test_grammar_response_format_llama.py @@ -29,26 +29,55 @@ async def test_grammar_response_format_llama_json(llama_grammar, response_snapsh unit: str temperature: List[int] + json_payload = { + "model": "tgi", + "messages": [ + { + "role": "system", + "content": f"Respond to the users questions and answer them in the following format: {Weather.schema()}", + }, + { + "role": "user", + "content": "What's the weather like the next 3 days in San Francisco, CA?", + }, + ], + "seed": 42, + "max_tokens": 500, + "response_format": {"type": "json_object", "value": Weather.schema()}, + } # send the request response = requests.post( f"{llama_grammar.base_url}/v1/chat/completions", headers=llama_grammar.headers, - json={ - "model": "tgi", - "messages": [ - { - "role": "system", - "content": f"Respond to the users questions and answer them in the following format: {Weather.schema()}", - }, - { - "role": "user", - "content": "What's the weather like the next 3 days in San Francisco, CA?", - }, - ], - "seed": 42, - "max_tokens": 500, - "response_format": {"type": "json_object", "value": Weather.schema()}, - }, + json=json_payload, + ) + + chat_completion = response.json() + called = chat_completion["choices"][0]["message"]["content"] + + assert response.status_code == 200 + assert called == '{ "unit": "fahrenheit", "temperature": [ 72, 79, 88 ] }' + assert chat_completion == response_snapshot + + json_payload["response_format"]["type"] = "json" + response = requests.post( + f"{llama_grammar.base_url}/v1/chat/completions", + headers=llama_grammar.headers, + json=json_payload, + ) + + chat_completion = response.json() + called = chat_completion["choices"][0]["message"]["content"] + + assert response.status_code == 200 + assert called == '{ "unit": "fahrenheit", "temperature": [ 72, 79, 88 ] }' + assert chat_completion == response_snapshot + + json_payload["response_format"]["type"] = "json_schema" + response = requests.post( + f"{llama_grammar.base_url}/v1/chat/completions", + headers=llama_grammar.headers, + json=json_payload, ) chat_completion = response.json() diff --git a/integration-tests/models/test_json_schema_constrain.py b/integration-tests/models/test_json_schema_constrain.py new file mode 100644 index 000000000..65b4a7b8e --- /dev/null +++ b/integration-tests/models/test_json_schema_constrain.py @@ -0,0 +1,209 @@ +import pytest +import json +import requests + + +@pytest.fixture(scope="module") +def model_handle(launcher): + """Fixture to provide the base URL for API calls.""" + with launcher( + "google/gemma-3-4b-it", + num_shard=2, + disable_grammar_support=False, + ) as handle: + yield handle + + +@pytest.fixture(scope="module") +async def model_fixture(model_handle): + await model_handle.health(300) + return model_handle.client + + +# Sample JSON Schema for testing +person_schema = { + "type": "object", + "$id": "https://example.com/person.schema.json", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "Person", + "properties": { + "firstName": { + "type": "string", + "description": "The person's first name.", + "minLength": 4, + }, + "lastName": { + "type": "string", + "description": "The person's last name.", + "minLength": 4, + }, + "hobby": { + "description": "The person's hobby.", + "type": "string", + "minLength": 4, + }, + "numCats": { + "description": "The number of cats the person has.", + "type": "integer", + "minimum": 0, + }, + }, + "required": ["firstName", "lastName", "hobby", "numCats"], +} + +# More complex schema for testing nested objects and arrays +complex_schema = { + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer", "minimum": 0}, + "address": { + "type": "object", + "properties": { + "street": {"type": "string"}, + "city": {"type": "string"}, + "postalCode": {"type": "string"}, + }, + "required": ["street", "city"], + }, + "hobbies": {"type": "array", "items": {"type": "string"}, "minItems": 1}, + }, + "required": ["name", "age", "hobbies"], +} + + +@pytest.mark.asyncio +@pytest.mark.private +async def test_json_schema_basic(model_fixture, response_snapshot): + """Test basic JSON schema validation with the person schema.""" + response = requests.post( + f"{model_fixture.base_url}/v1/chat/completions", + json={ + "model": "tgi", + "messages": [ + { + "role": "user", + "content": "David is a person who likes trees and nature. He enjoys studying math and science. He has 2 cats.", + }, + ], + "seed": 42, + "temperature": 0.0, + "response_format": { + "type": "json_schema", + "value": {"name": "person", "strict": True, "schema": person_schema}, + }, + }, + ) + + result = response.json() + + # Validate response format + content = result["choices"][0]["message"]["content"] + parsed_content = json.loads(content) + + assert "firstName" in parsed_content + assert "lastName" in parsed_content + assert "hobby" in parsed_content + assert "numCats" in parsed_content + assert isinstance(parsed_content["numCats"], int) + assert parsed_content["numCats"] >= 0 + assert result == response_snapshot + + +@pytest.mark.asyncio +@pytest.mark.private +async def test_json_schema_complex(model_fixture, response_snapshot): + """Test complex JSON schema with nested objects and arrays.""" + response = requests.post( + f"{model_fixture.base_url}/v1/chat/completions", + json={ + "model": "tgi", + "messages": [ + { + "role": "user", + "content": "John Smith is 30 years old. He lives on Maple Street in Boston. He enjoys botany, astronomy, and solving mathematical puzzles.", + }, + ], + "seed": 42, + "temperature": 0.0, + "response_format": { + "type": "json_schema", + "value": { + "name": "complex_person", + "strict": True, + "schema": complex_schema, + }, + }, + }, + ) + + result = response.json() + + # Validate response format + content = result["choices"][0]["message"]["content"] + parsed_content = json.loads(content) + + assert "name" in parsed_content + assert "age" in parsed_content + assert "hobbies" in parsed_content + assert "address" in parsed_content + assert "street" in parsed_content["address"] + assert "city" in parsed_content["address"] + assert isinstance(parsed_content["hobbies"], list) + assert len(parsed_content["hobbies"]) >= 1 + assert result == response_snapshot + + +@pytest.mark.asyncio +@pytest.mark.private +async def test_json_schema_stream(model_fixture, response_snapshot): + """Test JSON schema validation with streaming.""" + response = requests.post( + f"{model_fixture.base_url}/v1/chat/completions", + json={ + "model": "tgi", + "messages": [ + { + "role": "user", + "content": "David is a person who likes to ride bicycles. He has 2 cats.", + }, + ], + "seed": 42, + "temperature": 0.0, + "response_format": { + "type": "json_schema", + "value": {"name": "person", "strict": True, "schema": person_schema}, + }, + "stream": True, + }, + stream=True, + ) + + chunks = [] + content_generated = "" + + for line in response.iter_lines(): + if line: + # Remove the "data: " prefix and handle the special case of "[DONE]" + data = line.decode("utf-8") + if data.startswith("data: "): + data = data[6:] + if data != "[DONE]": + chunk = json.loads(data) + chunks.append(chunk) + if "choices" in chunk and len(chunk["choices"]) > 0: + if ( + "delta" in chunk["choices"][0] + and "content" in chunk["choices"][0]["delta"] + ): + content_generated += chunk["choices"][0]["delta"]["content"] + + # Validate the final assembled JSON + parsed_content = json.loads(content_generated) + assert "firstName" in parsed_content + assert "lastName" in parsed_content + assert "hobby" in parsed_content + assert "numCats" in parsed_content + assert isinstance(parsed_content["numCats"], int) + assert parsed_content["numCats"] >= 0 + assert chunks == response_snapshot diff --git a/router/src/lib.rs b/router/src/lib.rs index 3c1a01b3c..e5622fc22 100644 --- a/router/src/lib.rs +++ b/router/src/lib.rs @@ -222,6 +222,17 @@ impl HubProcessorConfig { } } +#[derive(Clone, Debug, Deserialize, ToSchema, Serialize)] +#[cfg_attr(test, derive(PartialEq))] +struct JsonSchemaConfig { + /// Optional name identifier for the schema + #[serde(skip_serializing_if = "Option::is_none")] + name: Option, + + /// The actual JSON schema definition + schema: serde_json::Value, +} + #[derive(Clone, Debug, Deserialize, ToSchema, Serialize)] #[cfg_attr(test, derive(PartialEq))] #[serde(tag = "type", content = "value")] @@ -234,8 +245,16 @@ pub(crate) enum GrammarType { #[serde(alias = "json_object")] #[schema(example = json ! ({"properties": {"location":{"type": "string"}}}))] Json(serde_json::Value), + #[serde(rename = "regex")] Regex(String), + + /// A JSON Schema specification with additional metadata. + /// + /// Includes an optional name for the schema, an optional strict flag, and the required schema definition. + #[serde(rename = "json_schema")] + #[schema(example = json ! ({"schema": {"properties": {"name": {"type": "string"}, "age": {"type": "integer"}}}, "name": "person_info", "strict": true}))] + JsonSchema(JsonSchemaConfig), } #[derive(Clone, Debug, Serialize, ToSchema)] diff --git a/router/src/server.rs b/router/src/server.rs index 22fad04be..f5ae5fcd9 100644 --- a/router/src/server.rs +++ b/router/src/server.rs @@ -14,7 +14,6 @@ use crate::sagemaker::{ }; use crate::validation::ValidationError; use crate::vertex::vertex_compatibility; -use crate::ChatTokenizeResponse; use crate::{ usage_stats, BestOfSequence, Details, ErrorResponse, FinishReason, FunctionName, GenerateParameters, GenerateRequest, GenerateResponse, GrammarType, HubModelInfo, @@ -29,6 +28,7 @@ use crate::{ ChatRequest, Chunk, CompatGenerateRequest, Completion, CompletionComplete, CompletionFinal, CompletionRequest, CompletionType, DeltaToolCall, Function, Prompt, Tool, }; +use crate::{ChatTokenizeResponse, JsonSchemaConfig}; use crate::{FunctionDefinition, HubPreprocessorConfig, ToolCall, ToolChoice}; use crate::{MessageBody, ModelInfo, ModelsInfo}; use async_stream::__private::AsyncStream; @@ -1362,6 +1362,7 @@ CompatGenerateRequest, SagemakerRequest, GenerateRequest, GrammarType, +JsonSchemaConfig, ChatRequest, Message, MessageContent, diff --git a/router/src/validation.rs b/router/src/validation.rs index b29391b77..28c7f2f8c 100644 --- a/router/src/validation.rs +++ b/router/src/validation.rs @@ -380,6 +380,28 @@ impl Validation { ValidGrammar::Regex(grammar_regex.to_string()) } + GrammarType::JsonSchema(schema_config) => { + // Extract the actual schema for validation + let json = &schema_config.schema; + + // Check if the json is a valid JSONSchema + jsonschema::draft202012::meta::validate(json) + .map_err(|e| ValidationError::InvalidGrammar(e.to_string()))?; + + // The schema can be valid but lack properties. + // We need properties for the grammar to be successfully parsed in Python. + // Therefore, we must check and throw an error if properties are missing. + json.get("properties") + .ok_or(ValidationError::InvalidGrammar( + "Grammar must have a 'properties' field".to_string(), + ))?; + + // Do compilation in the router for performance + let grammar_regex = json_schema_to_regex(json, None, json) + .map_err(ValidationError::RegexFromSchema)?; + + ValidGrammar::Regex(grammar_regex.to_string()) + } GrammarType::Regex(regex) => ValidGrammar::Regex(regex), }; Some(valid_grammar)