From 71ef9da72c68e99e4f1d0c4f55a444e3d3dab74a Mon Sep 17 00:00:00 2001 From: drbh Date: Fri, 14 Mar 2025 18:13:03 +0000 Subject: [PATCH] feat: support json_schema grammar constraining and add tests --- .../test_json_schema_basic.json | 23 + .../test_json_schema_complex.json | 23 + .../test_json_schema_stream.json | 743 ++++++++++++++++++ .../models/test_json_schema_constrain.py | 209 +++++ router/src/lib.rs | 24 +- router/src/validation.rs | 26 + 6 files changed, 1047 insertions(+), 1 deletion(-) create mode 100644 integration-tests/models/__snapshots__/test_json_schema_constrain/test_json_schema_basic.json create mode 100644 integration-tests/models/__snapshots__/test_json_schema_constrain/test_json_schema_complex.json create mode 100644 integration-tests/models/__snapshots__/test_json_schema_constrain/test_json_schema_stream.json create mode 100644 integration-tests/models/test_json_schema_constrain.py diff --git a/integration-tests/models/__snapshots__/test_json_schema_constrain/test_json_schema_basic.json b/integration-tests/models/__snapshots__/test_json_schema_constrain/test_json_schema_basic.json new file mode 100644 index 000000000..18e7242c0 --- /dev/null +++ b/integration-tests/models/__snapshots__/test_json_schema_constrain/test_json_schema_basic.json @@ -0,0 +1,23 @@ +{ + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "logprobs": null, + "message": { + "content": "{\"firstName\":\"David\",\"lastName\":\"(Not provided)\",\"hobby\":\": Trees and nature\",\"numCats\":2}", + "role": "assistant" + } + } + ], + "created": 1741975610, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion", + "system_fingerprint": "3.2.1-dev0-native", + "usage": { + "completion_tokens": 37, + "prompt_tokens": 32, + "total_tokens": 69 + } +} diff --git a/integration-tests/models/__snapshots__/test_json_schema_constrain/test_json_schema_complex.json b/integration-tests/models/__snapshots__/test_json_schema_constrain/test_json_schema_complex.json new file mode 100644 index 000000000..9efee3299 --- /dev/null +++ b/integration-tests/models/__snapshots__/test_json_schema_constrain/test_json_schema_complex.json @@ -0,0 +1,23 @@ +{ + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "logprobs": null, + "message": { + "content": "{\"name\":\"John Smith\",\"age\":30,\"address\":{\"street\":\"Maple Street\",\"city\":\"Boston\"},\"hobbies\":[\"botany\",\", \",\"astronomy\",\", \",\"solving mathematical puzzles\"]}", + "role": "assistant" + } + } + ], + "created": 1741975505, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion", + "system_fingerprint": "3.2.1-dev0-native", + "usage": { + "completion_tokens": 50, + "prompt_tokens": 37, + "total_tokens": 87 + } +} diff --git a/integration-tests/models/__snapshots__/test_json_schema_constrain/test_json_schema_stream.json b/integration-tests/models/__snapshots__/test_json_schema_constrain/test_json_schema_stream.json new file mode 100644 index 000000000..943fad302 --- /dev/null +++ b/integration-tests/models/__snapshots__/test_json_schema_constrain/test_json_schema_stream.json @@ -0,0 +1,743 @@ +[ + { + "choices": [ + { + "delta": { + "content": "{", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975615, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "\"", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975615, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "f", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975615, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "irs", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975615, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "t", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975615, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "Name", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975615, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "\":", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975615, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "\"", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975615, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "David", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975615, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "\",", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975615, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "\"", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975615, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "l", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975615, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "ast", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975615, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "Name", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975615, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "\":", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975615, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "\"", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975615, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "Unknown", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975615, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "\",", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975616, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "\"", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975616, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "h", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975616, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "obb", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975616, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "y", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975616, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "\":", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975616, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "\",", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975616, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " \\\"", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975616, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "riding", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975616, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " bicycles", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975616, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "\\\",", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975616, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " \\\"", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975616, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "having", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975616, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " cats", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975616, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "\\\"\",", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975616, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "\"", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975616, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "num", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975616, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "Cats", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975616, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "\":", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975616, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "2", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975616, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "}", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975616, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "", + "role": "assistant" + }, + "finish_reason": "stop", + "index": 0, + "logprobs": null + } + ], + "created": 1741975616, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + } +] diff --git a/integration-tests/models/test_json_schema_constrain.py b/integration-tests/models/test_json_schema_constrain.py new file mode 100644 index 000000000..65b4a7b8e --- /dev/null +++ b/integration-tests/models/test_json_schema_constrain.py @@ -0,0 +1,209 @@ +import pytest +import json +import requests + + +@pytest.fixture(scope="module") +def model_handle(launcher): + """Fixture to provide the base URL for API calls.""" + with launcher( + "google/gemma-3-4b-it", + num_shard=2, + disable_grammar_support=False, + ) as handle: + yield handle + + +@pytest.fixture(scope="module") +async def model_fixture(model_handle): + await model_handle.health(300) + return model_handle.client + + +# Sample JSON Schema for testing +person_schema = { + "type": "object", + "$id": "https://example.com/person.schema.json", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "Person", + "properties": { + "firstName": { + "type": "string", + "description": "The person's first name.", + "minLength": 4, + }, + "lastName": { + "type": "string", + "description": "The person's last name.", + "minLength": 4, + }, + "hobby": { + "description": "The person's hobby.", + "type": "string", + "minLength": 4, + }, + "numCats": { + "description": "The number of cats the person has.", + "type": "integer", + "minimum": 0, + }, + }, + "required": ["firstName", "lastName", "hobby", "numCats"], +} + +# More complex schema for testing nested objects and arrays +complex_schema = { + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer", "minimum": 0}, + "address": { + "type": "object", + "properties": { + "street": {"type": "string"}, + "city": {"type": "string"}, + "postalCode": {"type": "string"}, + }, + "required": ["street", "city"], + }, + "hobbies": {"type": "array", "items": {"type": "string"}, "minItems": 1}, + }, + "required": ["name", "age", "hobbies"], +} + + +@pytest.mark.asyncio +@pytest.mark.private +async def test_json_schema_basic(model_fixture, response_snapshot): + """Test basic JSON schema validation with the person schema.""" + response = requests.post( + f"{model_fixture.base_url}/v1/chat/completions", + json={ + "model": "tgi", + "messages": [ + { + "role": "user", + "content": "David is a person who likes trees and nature. He enjoys studying math and science. He has 2 cats.", + }, + ], + "seed": 42, + "temperature": 0.0, + "response_format": { + "type": "json_schema", + "value": {"name": "person", "strict": True, "schema": person_schema}, + }, + }, + ) + + result = response.json() + + # Validate response format + content = result["choices"][0]["message"]["content"] + parsed_content = json.loads(content) + + assert "firstName" in parsed_content + assert "lastName" in parsed_content + assert "hobby" in parsed_content + assert "numCats" in parsed_content + assert isinstance(parsed_content["numCats"], int) + assert parsed_content["numCats"] >= 0 + assert result == response_snapshot + + +@pytest.mark.asyncio +@pytest.mark.private +async def test_json_schema_complex(model_fixture, response_snapshot): + """Test complex JSON schema with nested objects and arrays.""" + response = requests.post( + f"{model_fixture.base_url}/v1/chat/completions", + json={ + "model": "tgi", + "messages": [ + { + "role": "user", + "content": "John Smith is 30 years old. He lives on Maple Street in Boston. He enjoys botany, astronomy, and solving mathematical puzzles.", + }, + ], + "seed": 42, + "temperature": 0.0, + "response_format": { + "type": "json_schema", + "value": { + "name": "complex_person", + "strict": True, + "schema": complex_schema, + }, + }, + }, + ) + + result = response.json() + + # Validate response format + content = result["choices"][0]["message"]["content"] + parsed_content = json.loads(content) + + assert "name" in parsed_content + assert "age" in parsed_content + assert "hobbies" in parsed_content + assert "address" in parsed_content + assert "street" in parsed_content["address"] + assert "city" in parsed_content["address"] + assert isinstance(parsed_content["hobbies"], list) + assert len(parsed_content["hobbies"]) >= 1 + assert result == response_snapshot + + +@pytest.mark.asyncio +@pytest.mark.private +async def test_json_schema_stream(model_fixture, response_snapshot): + """Test JSON schema validation with streaming.""" + response = requests.post( + f"{model_fixture.base_url}/v1/chat/completions", + json={ + "model": "tgi", + "messages": [ + { + "role": "user", + "content": "David is a person who likes to ride bicycles. He has 2 cats.", + }, + ], + "seed": 42, + "temperature": 0.0, + "response_format": { + "type": "json_schema", + "value": {"name": "person", "strict": True, "schema": person_schema}, + }, + "stream": True, + }, + stream=True, + ) + + chunks = [] + content_generated = "" + + for line in response.iter_lines(): + if line: + # Remove the "data: " prefix and handle the special case of "[DONE]" + data = line.decode("utf-8") + if data.startswith("data: "): + data = data[6:] + if data != "[DONE]": + chunk = json.loads(data) + chunks.append(chunk) + if "choices" in chunk and len(chunk["choices"]) > 0: + if ( + "delta" in chunk["choices"][0] + and "content" in chunk["choices"][0]["delta"] + ): + content_generated += chunk["choices"][0]["delta"]["content"] + + # Validate the final assembled JSON + parsed_content = json.loads(content_generated) + assert "firstName" in parsed_content + assert "lastName" in parsed_content + assert "hobby" in parsed_content + assert "numCats" in parsed_content + assert isinstance(parsed_content["numCats"], int) + assert parsed_content["numCats"] >= 0 + assert chunks == response_snapshot diff --git a/router/src/lib.rs b/router/src/lib.rs index e3b8aee91..e1a9a839a 100644 --- a/router/src/lib.rs +++ b/router/src/lib.rs @@ -215,6 +215,21 @@ impl HubProcessorConfig { } } +#[derive(Clone, Debug, Deserialize, ToSchema, Serialize)] +#[cfg_attr(test, derive(PartialEq))] +struct JsonSchemaConfig { + /// Optional name identifier for the schema + #[serde(skip_serializing_if = "Option::is_none")] + name: Option, + + /// Whether to enforce strict validation (optional) + #[serde(skip_serializing_if = "Option::is_none")] + strict: Option, + + /// The actual JSON schema definition + schema: serde_json::Value, +} + #[derive(Clone, Debug, Deserialize, ToSchema, Serialize)] #[cfg_attr(test, derive(PartialEq))] #[serde(tag = "type", content = "value")] @@ -224,12 +239,19 @@ pub(crate) enum GrammarType { /// JSON Schema is a declarative language that allows to annotate JSON documents /// with types and descriptions. #[serde(rename = "json")] - #[serde(alias = "json_schema")] #[serde(alias = "json_object")] #[schema(example = json ! ({"properties": {"location":{"type": "string"}}}))] Json(serde_json::Value), + #[serde(rename = "regex")] Regex(String), + + /// A JSON Schema specification with additional metadata. + /// + /// Includes an optional name for the schema, an optional strict flag, and the required schema definition. + #[serde(rename = "json_schema")] + #[schema(example = json ! ({"schema": {"properties": {"name": {"type": "string"}, "age": {"type": "integer"}}}, "name": "person_info", "strict": true}))] + JsonSchema(JsonSchemaConfig), } #[derive(Clone, Debug, Serialize, ToSchema)] diff --git a/router/src/validation.rs b/router/src/validation.rs index 1119347dc..625a4bdcd 100644 --- a/router/src/validation.rs +++ b/router/src/validation.rs @@ -380,6 +380,32 @@ impl Validation { ValidGrammar::Regex(grammar_regex.to_string()) } + GrammarType::JsonSchema(schema_config) => { + // Extract the actual schema for validation + let json = &schema_config.schema; + + // Check if the json is a valid JSONSchema + jsonschema::draft202012::meta::validate(json) + .map_err(|e| ValidationError::InvalidGrammar(e.to_string()))?; + + // The schema can be valid but lack properties. + // We need properties for the grammar to be successfully parsed in Python. + // Therefore, we must check and throw an error if properties are missing. + json.get("properties") + .ok_or(ValidationError::InvalidGrammar( + "Grammar must have a 'properties' field".to_string(), + ))?; + + // TODO: + // Apply strictness if specified + let _strict = schema_config.strict.unwrap_or(false); + + // Do compilation in the router for performance + let grammar_regex = json_schema_to_regex(json, None, json) + .map_err(ValidationError::RegexFromSchema)?; + + ValidGrammar::Regex(grammar_regex.to_string()) + } GrammarType::Regex(regex) => ValidGrammar::Regex(regex), }; Some(valid_grammar)