From d9cac33231f8653c21e4faa5a85a366b859942cc Mon Sep 17 00:00:00 2001 From: Alex Weston Date: Thu, 30 Jan 2025 14:03:54 -0500 Subject: [PATCH 1/8] Add json_schema alias for GrammarType --- router/src/lib.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/router/src/lib.rs b/router/src/lib.rs index e8b8f6632..48a8d6063 100644 --- a/router/src/lib.rs +++ b/router/src/lib.rs @@ -223,7 +223,8 @@ pub(crate) enum GrammarType { /// /// JSON Schema is a declarative language that allows to annotate JSON documents /// with types and descriptions. - #[serde(rename = "json")] + #[serde(rename = "json_schema")] + #[serde(alias = "json")] #[serde(alias = "json_object")] #[schema(example = json ! ({"properties": {"location":{"type": "string"}}}))] Json(serde_json::Value), From d278d3cf4c95d21f864ff774512e2ad4b06f53f2 Mon Sep 17 00:00:00 2001 From: Alex Weston Date: Thu, 30 Jan 2025 14:11:05 -0500 Subject: [PATCH 2/8] Add tests for all aliases --- .../test_grammar_response_format_llama.py | 61 ++++++++++++++----- 1 file changed, 45 insertions(+), 16 deletions(-) diff --git a/integration-tests/models/test_grammar_response_format_llama.py b/integration-tests/models/test_grammar_response_format_llama.py index f2a8a96da..809dc3dd7 100644 --- a/integration-tests/models/test_grammar_response_format_llama.py +++ b/integration-tests/models/test_grammar_response_format_llama.py @@ -29,26 +29,55 @@ async def test_grammar_response_format_llama_json(llama_grammar, response_snapsh unit: str temperature: List[int] + json_payload={ + "model": "tgi", + "messages": [ + { + "role": "system", + "content": f"Respond to the users questions and answer them in the following format: {Weather.schema()}", + }, + { + "role": "user", + "content": "What's the weather like the next 3 days in San Francisco, CA?", + }, + ], + "seed": 42, + "max_tokens": 500, + "response_format": {"type": "json_object", "value": Weather.schema()}, + } # send the request response = requests.post( f"{llama_grammar.base_url}/v1/chat/completions", headers=llama_grammar.headers, - json={ - "model": "tgi", - "messages": [ - { - "role": "system", - "content": f"Respond to the users questions and answer them in the following format: {Weather.schema()}", - }, - { - "role": "user", - "content": "What's the weather like the next 3 days in San Francisco, CA?", - }, - ], - "seed": 42, - "max_tokens": 500, - "response_format": {"type": "json_object", "value": Weather.schema()}, - }, + json=json_payload, + ) + + chat_completion = response.json() + called = chat_completion["choices"][0]["message"]["content"] + + assert response.status_code == 200 + assert called == '{ "unit": "fahrenheit", "temperature": [ 72, 79, 88 ] }' + assert chat_completion == response_snapshot + + json_payload["response_format"]["type"] = "json" + response = requests.post( + f"{llama_grammar.base_url}/v1/chat/completions", + headers=llama_grammar.headers, + json=json_payload, + ) + + chat_completion = response.json() + called = chat_completion["choices"][0]["message"]["content"] + + assert response.status_code == 200 + assert called == '{ "unit": "fahrenheit", "temperature": [ 72, 79, 88 ] }' + assert chat_completion == response_snapshot + + json_payload["response_format"]["type"] = "json_schema" + response = requests.post( + f"{llama_grammar.base_url}/v1/chat/completions", + headers=llama_grammar.headers, + json=json_payload, ) chat_completion = response.json() From 0928018ac2c072ae503f65bf9645b1fd0238a89f Mon Sep 17 00:00:00 2001 From: drbh Date: Thu, 20 Feb 2025 21:18:16 +0000 Subject: [PATCH 3/8] fix: various linter adjustments --- docs/openapi.json | 4 ++-- .../models/test_grammar_response_format_llama.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/openapi.json b/docs/openapi.json index 85ca3f977..d53866997 100644 --- a/docs/openapi.json +++ b/docs/openapi.json @@ -1746,7 +1746,7 @@ "type": { "type": "string", "enum": [ - "json" + "json_schema" ] }, "value": { @@ -2406,4 +2406,4 @@ "description": "Hugging Face Text Generation Inference API" } ] -} +} \ No newline at end of file diff --git a/integration-tests/models/test_grammar_response_format_llama.py b/integration-tests/models/test_grammar_response_format_llama.py index 809dc3dd7..d3ae735a7 100644 --- a/integration-tests/models/test_grammar_response_format_llama.py +++ b/integration-tests/models/test_grammar_response_format_llama.py @@ -29,7 +29,7 @@ async def test_grammar_response_format_llama_json(llama_grammar, response_snapsh unit: str temperature: List[int] - json_payload={ + json_payload = { "model": "tgi", "messages": [ { From 5e6ac4ff630b0cdd62e0c65cb40ad8999ae50cfb Mon Sep 17 00:00:00 2001 From: drbh Date: Thu, 20 Feb 2025 18:39:15 -0500 Subject: [PATCH 4/8] fix: end-of-file-fixer lint --- docs/openapi.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/openapi.json b/docs/openapi.json index d53866997..bcaa8bfd8 100644 --- a/docs/openapi.json +++ b/docs/openapi.json @@ -2406,4 +2406,4 @@ "description": "Hugging Face Text Generation Inference API" } ] -} \ No newline at end of file +} From 92025e4b67802d3b694fcb0217a8c87c03fd7043 Mon Sep 17 00:00:00 2001 From: drbh Date: Thu, 20 Feb 2025 23:45:15 +0000 Subject: [PATCH 5/8] fix: add test snapshots and avoid docs change --- docs/openapi.json | 4 ++-- ..._grammar_response_format_llama_json.1.json | 23 +++++++++++++++++++ ..._grammar_response_format_llama_json.2.json | 23 +++++++++++++++++++ router/src/lib.rs | 4 ++-- 4 files changed, 50 insertions(+), 4 deletions(-) create mode 100644 integration-tests/models/__snapshots__/test_grammar_response_format_llama/test_grammar_response_format_llama_json.1.json create mode 100644 integration-tests/models/__snapshots__/test_grammar_response_format_llama/test_grammar_response_format_llama_json.2.json diff --git a/docs/openapi.json b/docs/openapi.json index bcaa8bfd8..502f35640 100644 --- a/docs/openapi.json +++ b/docs/openapi.json @@ -1746,7 +1746,7 @@ "type": { "type": "string", "enum": [ - "json_schema" + "json" ] }, "value": { @@ -2406,4 +2406,4 @@ "description": "Hugging Face Text Generation Inference API" } ] -} +} \ No newline at end of file diff --git a/integration-tests/models/__snapshots__/test_grammar_response_format_llama/test_grammar_response_format_llama_json.1.json b/integration-tests/models/__snapshots__/test_grammar_response_format_llama/test_grammar_response_format_llama_json.1.json new file mode 100644 index 000000000..c4f804fcf --- /dev/null +++ b/integration-tests/models/__snapshots__/test_grammar_response_format_llama/test_grammar_response_format_llama_json.1.json @@ -0,0 +1,23 @@ +{ + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "logprobs": null, + "message": { + "content": "{ \"unit\": \"fahrenheit\", \"temperature\": [ 72, 79, 88 ] }", + "role": "assistant" + } + } + ], + "created": 1740095072, + "id": "", + "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", + "object": "chat.completion", + "system_fingerprint": "3.1.1-dev0-native", + "usage": { + "completion_tokens": 29, + "prompt_tokens": 135, + "total_tokens": 164 + } +} diff --git a/integration-tests/models/__snapshots__/test_grammar_response_format_llama/test_grammar_response_format_llama_json.2.json b/integration-tests/models/__snapshots__/test_grammar_response_format_llama/test_grammar_response_format_llama_json.2.json new file mode 100644 index 000000000..1be656eeb --- /dev/null +++ b/integration-tests/models/__snapshots__/test_grammar_response_format_llama/test_grammar_response_format_llama_json.2.json @@ -0,0 +1,23 @@ +{ + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "logprobs": null, + "message": { + "content": "{ \"unit\": \"fahrenheit\", \"temperature\": [ 72, 79, 88 ] }", + "role": "assistant" + } + } + ], + "created": 1740095073, + "id": "", + "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", + "object": "chat.completion", + "system_fingerprint": "3.1.1-dev0-native", + "usage": { + "completion_tokens": 29, + "prompt_tokens": 135, + "total_tokens": 164 + } +} diff --git a/router/src/lib.rs b/router/src/lib.rs index 48a8d6063..e3b8aee91 100644 --- a/router/src/lib.rs +++ b/router/src/lib.rs @@ -223,8 +223,8 @@ pub(crate) enum GrammarType { /// /// JSON Schema is a declarative language that allows to annotate JSON documents /// with types and descriptions. - #[serde(rename = "json_schema")] - #[serde(alias = "json")] + #[serde(rename = "json")] + #[serde(alias = "json_schema")] #[serde(alias = "json_object")] #[schema(example = json ! ({"properties": {"location":{"type": "string"}}}))] Json(serde_json::Value), From 5e61553f486aa2502595c7cdcb8e2446871eb427 Mon Sep 17 00:00:00 2001 From: drbh Date: Thu, 20 Feb 2025 23:46:04 +0000 Subject: [PATCH 6/8] fix: another end-of-file-fixer lint --- docs/openapi.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/openapi.json b/docs/openapi.json index 502f35640..85ca3f977 100644 --- a/docs/openapi.json +++ b/docs/openapi.json @@ -2406,4 +2406,4 @@ "description": "Hugging Face Text Generation Inference API" } ] -} \ No newline at end of file +} From 71ef9da72c68e99e4f1d0c4f55a444e3d3dab74a Mon Sep 17 00:00:00 2001 From: drbh Date: Fri, 14 Mar 2025 18:13:03 +0000 Subject: [PATCH 7/8] feat: support json_schema grammar constraining and add tests --- .../test_json_schema_basic.json | 23 + .../test_json_schema_complex.json | 23 + .../test_json_schema_stream.json | 743 ++++++++++++++++++ .../models/test_json_schema_constrain.py | 209 +++++ router/src/lib.rs | 24 +- router/src/validation.rs | 26 + 6 files changed, 1047 insertions(+), 1 deletion(-) create mode 100644 integration-tests/models/__snapshots__/test_json_schema_constrain/test_json_schema_basic.json create mode 100644 integration-tests/models/__snapshots__/test_json_schema_constrain/test_json_schema_complex.json create mode 100644 integration-tests/models/__snapshots__/test_json_schema_constrain/test_json_schema_stream.json create mode 100644 integration-tests/models/test_json_schema_constrain.py diff --git a/integration-tests/models/__snapshots__/test_json_schema_constrain/test_json_schema_basic.json b/integration-tests/models/__snapshots__/test_json_schema_constrain/test_json_schema_basic.json new file mode 100644 index 000000000..18e7242c0 --- /dev/null +++ b/integration-tests/models/__snapshots__/test_json_schema_constrain/test_json_schema_basic.json @@ -0,0 +1,23 @@ +{ + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "logprobs": null, + "message": { + "content": "{\"firstName\":\"David\",\"lastName\":\"(Not provided)\",\"hobby\":\": Trees and nature\",\"numCats\":2}", + "role": "assistant" + } + } + ], + "created": 1741975610, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion", + "system_fingerprint": "3.2.1-dev0-native", + "usage": { + "completion_tokens": 37, + "prompt_tokens": 32, + "total_tokens": 69 + } +} diff --git a/integration-tests/models/__snapshots__/test_json_schema_constrain/test_json_schema_complex.json b/integration-tests/models/__snapshots__/test_json_schema_constrain/test_json_schema_complex.json new file mode 100644 index 000000000..9efee3299 --- /dev/null +++ b/integration-tests/models/__snapshots__/test_json_schema_constrain/test_json_schema_complex.json @@ -0,0 +1,23 @@ +{ + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "logprobs": null, + "message": { + "content": "{\"name\":\"John Smith\",\"age\":30,\"address\":{\"street\":\"Maple Street\",\"city\":\"Boston\"},\"hobbies\":[\"botany\",\", \",\"astronomy\",\", \",\"solving mathematical puzzles\"]}", + "role": "assistant" + } + } + ], + "created": 1741975505, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion", + "system_fingerprint": "3.2.1-dev0-native", + "usage": { + "completion_tokens": 50, + "prompt_tokens": 37, + "total_tokens": 87 + } +} diff --git a/integration-tests/models/__snapshots__/test_json_schema_constrain/test_json_schema_stream.json b/integration-tests/models/__snapshots__/test_json_schema_constrain/test_json_schema_stream.json new file mode 100644 index 000000000..943fad302 --- /dev/null +++ b/integration-tests/models/__snapshots__/test_json_schema_constrain/test_json_schema_stream.json @@ -0,0 +1,743 @@ +[ + { + "choices": [ + { + "delta": { + "content": "{", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975615, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "\"", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975615, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "f", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975615, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "irs", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975615, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "t", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975615, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "Name", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975615, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "\":", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975615, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "\"", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975615, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "David", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975615, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "\",", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975615, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "\"", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975615, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "l", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975615, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "ast", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975615, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "Name", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975615, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "\":", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975615, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "\"", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975615, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "Unknown", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975615, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "\",", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975616, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "\"", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975616, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "h", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975616, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "obb", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975616, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "y", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975616, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "\":", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975616, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "\",", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975616, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " \\\"", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975616, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "riding", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975616, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " bicycles", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975616, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "\\\",", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975616, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " \\\"", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975616, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "having", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975616, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": " cats", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975616, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "\\\"\",", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975616, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "\"", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975616, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "num", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975616, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "Cats", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975616, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "\":", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975616, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "2", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975616, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "}", + "role": "assistant" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1741975616, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + }, + { + "choices": [ + { + "delta": { + "content": "", + "role": "assistant" + }, + "finish_reason": "stop", + "index": 0, + "logprobs": null + } + ], + "created": 1741975616, + "id": "", + "model": "google/gemma-3-4b-it", + "object": "chat.completion.chunk", + "system_fingerprint": "3.2.1-dev0-native", + "usage": null + } +] diff --git a/integration-tests/models/test_json_schema_constrain.py b/integration-tests/models/test_json_schema_constrain.py new file mode 100644 index 000000000..65b4a7b8e --- /dev/null +++ b/integration-tests/models/test_json_schema_constrain.py @@ -0,0 +1,209 @@ +import pytest +import json +import requests + + +@pytest.fixture(scope="module") +def model_handle(launcher): + """Fixture to provide the base URL for API calls.""" + with launcher( + "google/gemma-3-4b-it", + num_shard=2, + disable_grammar_support=False, + ) as handle: + yield handle + + +@pytest.fixture(scope="module") +async def model_fixture(model_handle): + await model_handle.health(300) + return model_handle.client + + +# Sample JSON Schema for testing +person_schema = { + "type": "object", + "$id": "https://example.com/person.schema.json", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "Person", + "properties": { + "firstName": { + "type": "string", + "description": "The person's first name.", + "minLength": 4, + }, + "lastName": { + "type": "string", + "description": "The person's last name.", + "minLength": 4, + }, + "hobby": { + "description": "The person's hobby.", + "type": "string", + "minLength": 4, + }, + "numCats": { + "description": "The number of cats the person has.", + "type": "integer", + "minimum": 0, + }, + }, + "required": ["firstName", "lastName", "hobby", "numCats"], +} + +# More complex schema for testing nested objects and arrays +complex_schema = { + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer", "minimum": 0}, + "address": { + "type": "object", + "properties": { + "street": {"type": "string"}, + "city": {"type": "string"}, + "postalCode": {"type": "string"}, + }, + "required": ["street", "city"], + }, + "hobbies": {"type": "array", "items": {"type": "string"}, "minItems": 1}, + }, + "required": ["name", "age", "hobbies"], +} + + +@pytest.mark.asyncio +@pytest.mark.private +async def test_json_schema_basic(model_fixture, response_snapshot): + """Test basic JSON schema validation with the person schema.""" + response = requests.post( + f"{model_fixture.base_url}/v1/chat/completions", + json={ + "model": "tgi", + "messages": [ + { + "role": "user", + "content": "David is a person who likes trees and nature. He enjoys studying math and science. He has 2 cats.", + }, + ], + "seed": 42, + "temperature": 0.0, + "response_format": { + "type": "json_schema", + "value": {"name": "person", "strict": True, "schema": person_schema}, + }, + }, + ) + + result = response.json() + + # Validate response format + content = result["choices"][0]["message"]["content"] + parsed_content = json.loads(content) + + assert "firstName" in parsed_content + assert "lastName" in parsed_content + assert "hobby" in parsed_content + assert "numCats" in parsed_content + assert isinstance(parsed_content["numCats"], int) + assert parsed_content["numCats"] >= 0 + assert result == response_snapshot + + +@pytest.mark.asyncio +@pytest.mark.private +async def test_json_schema_complex(model_fixture, response_snapshot): + """Test complex JSON schema with nested objects and arrays.""" + response = requests.post( + f"{model_fixture.base_url}/v1/chat/completions", + json={ + "model": "tgi", + "messages": [ + { + "role": "user", + "content": "John Smith is 30 years old. He lives on Maple Street in Boston. He enjoys botany, astronomy, and solving mathematical puzzles.", + }, + ], + "seed": 42, + "temperature": 0.0, + "response_format": { + "type": "json_schema", + "value": { + "name": "complex_person", + "strict": True, + "schema": complex_schema, + }, + }, + }, + ) + + result = response.json() + + # Validate response format + content = result["choices"][0]["message"]["content"] + parsed_content = json.loads(content) + + assert "name" in parsed_content + assert "age" in parsed_content + assert "hobbies" in parsed_content + assert "address" in parsed_content + assert "street" in parsed_content["address"] + assert "city" in parsed_content["address"] + assert isinstance(parsed_content["hobbies"], list) + assert len(parsed_content["hobbies"]) >= 1 + assert result == response_snapshot + + +@pytest.mark.asyncio +@pytest.mark.private +async def test_json_schema_stream(model_fixture, response_snapshot): + """Test JSON schema validation with streaming.""" + response = requests.post( + f"{model_fixture.base_url}/v1/chat/completions", + json={ + "model": "tgi", + "messages": [ + { + "role": "user", + "content": "David is a person who likes to ride bicycles. He has 2 cats.", + }, + ], + "seed": 42, + "temperature": 0.0, + "response_format": { + "type": "json_schema", + "value": {"name": "person", "strict": True, "schema": person_schema}, + }, + "stream": True, + }, + stream=True, + ) + + chunks = [] + content_generated = "" + + for line in response.iter_lines(): + if line: + # Remove the "data: " prefix and handle the special case of "[DONE]" + data = line.decode("utf-8") + if data.startswith("data: "): + data = data[6:] + if data != "[DONE]": + chunk = json.loads(data) + chunks.append(chunk) + if "choices" in chunk and len(chunk["choices"]) > 0: + if ( + "delta" in chunk["choices"][0] + and "content" in chunk["choices"][0]["delta"] + ): + content_generated += chunk["choices"][0]["delta"]["content"] + + # Validate the final assembled JSON + parsed_content = json.loads(content_generated) + assert "firstName" in parsed_content + assert "lastName" in parsed_content + assert "hobby" in parsed_content + assert "numCats" in parsed_content + assert isinstance(parsed_content["numCats"], int) + assert parsed_content["numCats"] >= 0 + assert chunks == response_snapshot diff --git a/router/src/lib.rs b/router/src/lib.rs index e3b8aee91..e1a9a839a 100644 --- a/router/src/lib.rs +++ b/router/src/lib.rs @@ -215,6 +215,21 @@ impl HubProcessorConfig { } } +#[derive(Clone, Debug, Deserialize, ToSchema, Serialize)] +#[cfg_attr(test, derive(PartialEq))] +struct JsonSchemaConfig { + /// Optional name identifier for the schema + #[serde(skip_serializing_if = "Option::is_none")] + name: Option, + + /// Whether to enforce strict validation (optional) + #[serde(skip_serializing_if = "Option::is_none")] + strict: Option, + + /// The actual JSON schema definition + schema: serde_json::Value, +} + #[derive(Clone, Debug, Deserialize, ToSchema, Serialize)] #[cfg_attr(test, derive(PartialEq))] #[serde(tag = "type", content = "value")] @@ -224,12 +239,19 @@ pub(crate) enum GrammarType { /// JSON Schema is a declarative language that allows to annotate JSON documents /// with types and descriptions. #[serde(rename = "json")] - #[serde(alias = "json_schema")] #[serde(alias = "json_object")] #[schema(example = json ! ({"properties": {"location":{"type": "string"}}}))] Json(serde_json::Value), + #[serde(rename = "regex")] Regex(String), + + /// A JSON Schema specification with additional metadata. + /// + /// Includes an optional name for the schema, an optional strict flag, and the required schema definition. + #[serde(rename = "json_schema")] + #[schema(example = json ! ({"schema": {"properties": {"name": {"type": "string"}, "age": {"type": "integer"}}}, "name": "person_info", "strict": true}))] + JsonSchema(JsonSchemaConfig), } #[derive(Clone, Debug, Serialize, ToSchema)] diff --git a/router/src/validation.rs b/router/src/validation.rs index 1119347dc..625a4bdcd 100644 --- a/router/src/validation.rs +++ b/router/src/validation.rs @@ -380,6 +380,32 @@ impl Validation { ValidGrammar::Regex(grammar_regex.to_string()) } + GrammarType::JsonSchema(schema_config) => { + // Extract the actual schema for validation + let json = &schema_config.schema; + + // Check if the json is a valid JSONSchema + jsonschema::draft202012::meta::validate(json) + .map_err(|e| ValidationError::InvalidGrammar(e.to_string()))?; + + // The schema can be valid but lack properties. + // We need properties for the grammar to be successfully parsed in Python. + // Therefore, we must check and throw an error if properties are missing. + json.get("properties") + .ok_or(ValidationError::InvalidGrammar( + "Grammar must have a 'properties' field".to_string(), + ))?; + + // TODO: + // Apply strictness if specified + let _strict = schema_config.strict.unwrap_or(false); + + // Do compilation in the router for performance + let grammar_regex = json_schema_to_regex(json, None, json) + .map_err(ValidationError::RegexFromSchema)?; + + ValidGrammar::Regex(grammar_regex.to_string()) + } GrammarType::Regex(regex) => ValidGrammar::Regex(regex), }; Some(valid_grammar) From 65c6008847b466b5b673c91dccd90d19913619f9 Mon Sep 17 00:00:00 2001 From: drbh Date: Mon, 17 Mar 2025 14:51:33 +0000 Subject: [PATCH 8/8] fix: bump openapi doc with new grammar option --- docs/openapi.json | 34 ++++++++++++++++++++++++++++++++++ router/src/lib.rs | 4 ---- router/src/server.rs | 3 ++- router/src/validation.rs | 4 ---- 4 files changed, 36 insertions(+), 9 deletions(-) diff --git a/docs/openapi.json b/docs/openapi.json index 85ca3f977..2d3a023a5 100644 --- a/docs/openapi.json +++ b/docs/openapi.json @@ -1771,6 +1771,24 @@ "type": "string" } } + }, + { + "type": "object", + "required": [ + "type", + "value" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "json_schema" + ] + }, + "value": { + "$ref": "#/components/schemas/JsonSchemaConfig" + } + } } ], "discriminator": { @@ -1864,6 +1882,22 @@ } } }, + "JsonSchemaConfig": { + "type": "object", + "required": [ + "schema" + ], + "properties": { + "name": { + "type": "string", + "description": "Optional name identifier for the schema", + "nullable": true + }, + "schema": { + "description": "The actual JSON schema definition" + } + } + }, "Message": { "allOf": [ { diff --git a/router/src/lib.rs b/router/src/lib.rs index e1a9a839a..e1ee05415 100644 --- a/router/src/lib.rs +++ b/router/src/lib.rs @@ -222,10 +222,6 @@ struct JsonSchemaConfig { #[serde(skip_serializing_if = "Option::is_none")] name: Option, - /// Whether to enforce strict validation (optional) - #[serde(skip_serializing_if = "Option::is_none")] - strict: Option, - /// The actual JSON schema definition schema: serde_json::Value, } diff --git a/router/src/server.rs b/router/src/server.rs index 45d2b9f3c..1cf345460 100644 --- a/router/src/server.rs +++ b/router/src/server.rs @@ -13,7 +13,6 @@ use crate::sagemaker::{ }; use crate::validation::ValidationError; use crate::vertex::vertex_compatibility; -use crate::ChatTokenizeResponse; use crate::{ usage_stats, BestOfSequence, Details, ErrorResponse, FinishReason, FunctionName, GenerateParameters, GenerateRequest, GenerateResponse, GrammarType, HubModelInfo, @@ -28,6 +27,7 @@ use crate::{ ChatRequest, Chunk, CompatGenerateRequest, Completion, CompletionComplete, CompletionFinal, CompletionRequest, CompletionType, DeltaToolCall, Function, Prompt, Tool, }; +use crate::{ChatTokenizeResponse, JsonSchemaConfig}; use crate::{FunctionDefinition, HubPreprocessorConfig, ToolCall, ToolChoice}; use crate::{MessageBody, ModelInfo, ModelsInfo}; use async_stream::__private::AsyncStream; @@ -1339,6 +1339,7 @@ CompatGenerateRequest, SagemakerRequest, GenerateRequest, GrammarType, +JsonSchemaConfig, ChatRequest, Message, MessageContent, diff --git a/router/src/validation.rs b/router/src/validation.rs index 625a4bdcd..3068b14b8 100644 --- a/router/src/validation.rs +++ b/router/src/validation.rs @@ -396,10 +396,6 @@ impl Validation { "Grammar must have a 'properties' field".to_string(), ))?; - // TODO: - // Apply strictness if specified - let _strict = schema_config.strict.unwrap_or(false); - // Do compilation in the router for performance let grammar_regex = json_schema_to_regex(json, None, json) .map_err(ValidationError::RegexFromSchema)?;