From d9cac33231f8653c21e4faa5a85a366b859942cc Mon Sep 17 00:00:00 2001
From: Alex Weston <alexw@alkymi.io>
Date: Thu, 30 Jan 2025 14:03:54 -0500
Subject: [PATCH 1/8] Add json_schema alias for GrammarType

---
 router/src/lib.rs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/router/src/lib.rs b/router/src/lib.rs
index e8b8f663..48a8d606 100644
--- a/router/src/lib.rs
+++ b/router/src/lib.rs
@@ -223,7 +223,8 @@ pub(crate) enum GrammarType {
     ///
     /// JSON Schema is a declarative language that allows to annotate JSON documents
     /// with types and descriptions.
-    #[serde(rename = "json")]
+    #[serde(rename = "json_schema")]
+    #[serde(alias = "json")]
     #[serde(alias = "json_object")]
     #[schema(example = json ! ({"properties": {"location":{"type": "string"}}}))]
     Json(serde_json::Value),

From d278d3cf4c95d21f864ff774512e2ad4b06f53f2 Mon Sep 17 00:00:00 2001
From: Alex Weston <alexw@alkymi.io>
Date: Thu, 30 Jan 2025 14:11:05 -0500
Subject: [PATCH 2/8] Add tests for all aliases

---
 .../test_grammar_response_format_llama.py     | 61 ++++++++++++++-----
 1 file changed, 45 insertions(+), 16 deletions(-)

diff --git a/integration-tests/models/test_grammar_response_format_llama.py b/integration-tests/models/test_grammar_response_format_llama.py
index f2a8a96d..809dc3dd 100644
--- a/integration-tests/models/test_grammar_response_format_llama.py
+++ b/integration-tests/models/test_grammar_response_format_llama.py
@@ -29,26 +29,55 @@ async def test_grammar_response_format_llama_json(llama_grammar, response_snapsh
         unit: str
         temperature: List[int]
 
+    json_payload={
+        "model": "tgi",
+        "messages": [
+            {
+                "role": "system",
+                "content": f"Respond to the users questions and answer them in the following format: {Weather.schema()}",
+            },
+            {
+                "role": "user",
+                "content": "What's the weather like the next 3 days in San Francisco, CA?",
+            },
+        ],
+        "seed": 42,
+        "max_tokens": 500,
+        "response_format": {"type": "json_object", "value": Weather.schema()},
+    }
     # send the request
     response = requests.post(
         f"{llama_grammar.base_url}/v1/chat/completions",
         headers=llama_grammar.headers,
-        json={
-            "model": "tgi",
-            "messages": [
-                {
-                    "role": "system",
-                    "content": f"Respond to the users questions and answer them in the following format: {Weather.schema()}",
-                },
-                {
-                    "role": "user",
-                    "content": "What's the weather like the next 3 days in San Francisco, CA?",
-                },
-            ],
-            "seed": 42,
-            "max_tokens": 500,
-            "response_format": {"type": "json_object", "value": Weather.schema()},
-        },
+        json=json_payload,
+    )
+
+    chat_completion = response.json()
+    called = chat_completion["choices"][0]["message"]["content"]
+
+    assert response.status_code == 200
+    assert called == '{ "unit": "fahrenheit", "temperature": [ 72, 79, 88 ] }'
+    assert chat_completion == response_snapshot
+
+    json_payload["response_format"]["type"] = "json"
+    response = requests.post(
+        f"{llama_grammar.base_url}/v1/chat/completions",
+        headers=llama_grammar.headers,
+        json=json_payload,
+    )
+
+    chat_completion = response.json()
+    called = chat_completion["choices"][0]["message"]["content"]
+
+    assert response.status_code == 200
+    assert called == '{ "unit": "fahrenheit", "temperature": [ 72, 79, 88 ] }'
+    assert chat_completion == response_snapshot
+
+    json_payload["response_format"]["type"] = "json_schema"
+    response = requests.post(
+        f"{llama_grammar.base_url}/v1/chat/completions",
+        headers=llama_grammar.headers,
+        json=json_payload,
     )
 
     chat_completion = response.json()

From 0928018ac2c072ae503f65bf9645b1fd0238a89f Mon Sep 17 00:00:00 2001
From: drbh <david.richard.holtz@gmail.com>
Date: Thu, 20 Feb 2025 21:18:16 +0000
Subject: [PATCH 3/8] fix: various linter adjustments

---
 docs/openapi.json                                             | 4 ++--
 .../models/test_grammar_response_format_llama.py              | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/openapi.json b/docs/openapi.json
index 85ca3f97..d5386699 100644
--- a/docs/openapi.json
+++ b/docs/openapi.json
@@ -1746,7 +1746,7 @@
               "type": {
                 "type": "string",
                 "enum": [
-                  "json"
+                  "json_schema"
                 ]
               },
               "value": {
@@ -2406,4 +2406,4 @@
       "description": "Hugging Face Text Generation Inference API"
     }
   ]
-}
+}
\ No newline at end of file
diff --git a/integration-tests/models/test_grammar_response_format_llama.py b/integration-tests/models/test_grammar_response_format_llama.py
index 809dc3dd..d3ae735a 100644
--- a/integration-tests/models/test_grammar_response_format_llama.py
+++ b/integration-tests/models/test_grammar_response_format_llama.py
@@ -29,7 +29,7 @@ async def test_grammar_response_format_llama_json(llama_grammar, response_snapsh
         unit: str
         temperature: List[int]
 
-    json_payload={
+    json_payload = {
         "model": "tgi",
         "messages": [
             {

From 5e6ac4ff630b0cdd62e0c65cb40ad8999ae50cfb Mon Sep 17 00:00:00 2001
From: drbh <david.richard.holtz@gmail.com>
Date: Thu, 20 Feb 2025 18:39:15 -0500
Subject: [PATCH 4/8] fix: end-of-file-fixer lint

---
 docs/openapi.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/openapi.json b/docs/openapi.json
index d5386699..bcaa8bfd 100644
--- a/docs/openapi.json
+++ b/docs/openapi.json
@@ -2406,4 +2406,4 @@
       "description": "Hugging Face Text Generation Inference API"
     }
   ]
-}
\ No newline at end of file
+}

From 92025e4b67802d3b694fcb0217a8c87c03fd7043 Mon Sep 17 00:00:00 2001
From: drbh <david.richard.holtz@gmail.com>
Date: Thu, 20 Feb 2025 23:45:15 +0000
Subject: [PATCH 5/8] fix: add test snapshots and avoid docs change

---
 docs/openapi.json                             |  4 ++--
 ..._grammar_response_format_llama_json.1.json | 23 +++++++++++++++++++
 ..._grammar_response_format_llama_json.2.json | 23 +++++++++++++++++++
 router/src/lib.rs                             |  4 ++--
 4 files changed, 50 insertions(+), 4 deletions(-)
 create mode 100644 integration-tests/models/__snapshots__/test_grammar_response_format_llama/test_grammar_response_format_llama_json.1.json
 create mode 100644 integration-tests/models/__snapshots__/test_grammar_response_format_llama/test_grammar_response_format_llama_json.2.json

diff --git a/docs/openapi.json b/docs/openapi.json
index bcaa8bfd..502f3564 100644
--- a/docs/openapi.json
+++ b/docs/openapi.json
@@ -1746,7 +1746,7 @@
               "type": {
                 "type": "string",
                 "enum": [
-                  "json_schema"
+                  "json"
                 ]
               },
               "value": {
@@ -2406,4 +2406,4 @@
       "description": "Hugging Face Text Generation Inference API"
     }
   ]
-}
+}
\ No newline at end of file
diff --git a/integration-tests/models/__snapshots__/test_grammar_response_format_llama/test_grammar_response_format_llama_json.1.json b/integration-tests/models/__snapshots__/test_grammar_response_format_llama/test_grammar_response_format_llama_json.1.json
new file mode 100644
index 00000000..c4f804fc
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_grammar_response_format_llama/test_grammar_response_format_llama_json.1.json
@@ -0,0 +1,23 @@
+{
+  "choices": [
+    {
+      "finish_reason": "stop",
+      "index": 0,
+      "logprobs": null,
+      "message": {
+        "content": "{ \"unit\": \"fahrenheit\", \"temperature\": [ 72, 79, 88 ] }",
+        "role": "assistant"
+      }
+    }
+  ],
+  "created": 1740095072,
+  "id": "",
+  "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+  "object": "chat.completion",
+  "system_fingerprint": "3.1.1-dev0-native",
+  "usage": {
+    "completion_tokens": 29,
+    "prompt_tokens": 135,
+    "total_tokens": 164
+  }
+}
diff --git a/integration-tests/models/__snapshots__/test_grammar_response_format_llama/test_grammar_response_format_llama_json.2.json b/integration-tests/models/__snapshots__/test_grammar_response_format_llama/test_grammar_response_format_llama_json.2.json
new file mode 100644
index 00000000..1be656ee
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_grammar_response_format_llama/test_grammar_response_format_llama_json.2.json
@@ -0,0 +1,23 @@
+{
+  "choices": [
+    {
+      "finish_reason": "stop",
+      "index": 0,
+      "logprobs": null,
+      "message": {
+        "content": "{ \"unit\": \"fahrenheit\", \"temperature\": [ 72, 79, 88 ] }",
+        "role": "assistant"
+      }
+    }
+  ],
+  "created": 1740095073,
+  "id": "",
+  "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+  "object": "chat.completion",
+  "system_fingerprint": "3.1.1-dev0-native",
+  "usage": {
+    "completion_tokens": 29,
+    "prompt_tokens": 135,
+    "total_tokens": 164
+  }
+}
diff --git a/router/src/lib.rs b/router/src/lib.rs
index 48a8d606..e3b8aee9 100644
--- a/router/src/lib.rs
+++ b/router/src/lib.rs
@@ -223,8 +223,8 @@ pub(crate) enum GrammarType {
     ///
     /// JSON Schema is a declarative language that allows to annotate JSON documents
     /// with types and descriptions.
-    #[serde(rename = "json_schema")]
-    #[serde(alias = "json")]
+    #[serde(rename = "json")]
+    #[serde(alias = "json_schema")]
     #[serde(alias = "json_object")]
     #[schema(example = json ! ({"properties": {"location":{"type": "string"}}}))]
     Json(serde_json::Value),

From 5e61553f486aa2502595c7cdcb8e2446871eb427 Mon Sep 17 00:00:00 2001
From: drbh <david.richard.holtz@gmail.com>
Date: Thu, 20 Feb 2025 23:46:04 +0000
Subject: [PATCH 6/8] fix: another end-of-file-fixer lint

---
 docs/openapi.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/openapi.json b/docs/openapi.json
index 502f3564..85ca3f97 100644
--- a/docs/openapi.json
+++ b/docs/openapi.json
@@ -2406,4 +2406,4 @@
       "description": "Hugging Face Text Generation Inference API"
     }
   ]
-}
\ No newline at end of file
+}

From 71ef9da72c68e99e4f1d0c4f55a444e3d3dab74a Mon Sep 17 00:00:00 2001
From: drbh <david.richard.holtz@gmail.com>
Date: Fri, 14 Mar 2025 18:13:03 +0000
Subject: [PATCH 7/8] feat: support json_schema grammar constraining and add
 tests

---
 .../test_json_schema_basic.json               |  23 +
 .../test_json_schema_complex.json             |  23 +
 .../test_json_schema_stream.json              | 743 ++++++++++++++++++
 .../models/test_json_schema_constrain.py      | 209 +++++
 router/src/lib.rs                             |  24 +-
 router/src/validation.rs                      |  26 +
 6 files changed, 1047 insertions(+), 1 deletion(-)
 create mode 100644 integration-tests/models/__snapshots__/test_json_schema_constrain/test_json_schema_basic.json
 create mode 100644 integration-tests/models/__snapshots__/test_json_schema_constrain/test_json_schema_complex.json
 create mode 100644 integration-tests/models/__snapshots__/test_json_schema_constrain/test_json_schema_stream.json
 create mode 100644 integration-tests/models/test_json_schema_constrain.py

diff --git a/integration-tests/models/__snapshots__/test_json_schema_constrain/test_json_schema_basic.json b/integration-tests/models/__snapshots__/test_json_schema_constrain/test_json_schema_basic.json
new file mode 100644
index 00000000..18e7242c
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_json_schema_constrain/test_json_schema_basic.json
@@ -0,0 +1,23 @@
+{
+  "choices": [
+    {
+      "finish_reason": "stop",
+      "index": 0,
+      "logprobs": null,
+      "message": {
+        "content": "{\"firstName\":\"David\",\"lastName\":\"(Not provided)\",\"hobby\":\": Trees and nature\",\"numCats\":2}",
+        "role": "assistant"
+      }
+    }
+  ],
+  "created": 1741975610,
+  "id": "",
+  "model": "google/gemma-3-4b-it",
+  "object": "chat.completion",
+  "system_fingerprint": "3.2.1-dev0-native",
+  "usage": {
+    "completion_tokens": 37,
+    "prompt_tokens": 32,
+    "total_tokens": 69
+  }
+}
diff --git a/integration-tests/models/__snapshots__/test_json_schema_constrain/test_json_schema_complex.json b/integration-tests/models/__snapshots__/test_json_schema_constrain/test_json_schema_complex.json
new file mode 100644
index 00000000..9efee329
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_json_schema_constrain/test_json_schema_complex.json
@@ -0,0 +1,23 @@
+{
+  "choices": [
+    {
+      "finish_reason": "stop",
+      "index": 0,
+      "logprobs": null,
+      "message": {
+        "content": "{\"name\":\"John Smith\",\"age\":30,\"address\":{\"street\":\"Maple Street\",\"city\":\"Boston\"},\"hobbies\":[\"botany\",\", \",\"astronomy\",\", \",\"solving mathematical puzzles\"]}",
+        "role": "assistant"
+      }
+    }
+  ],
+  "created": 1741975505,
+  "id": "",
+  "model": "google/gemma-3-4b-it",
+  "object": "chat.completion",
+  "system_fingerprint": "3.2.1-dev0-native",
+  "usage": {
+    "completion_tokens": 50,
+    "prompt_tokens": 37,
+    "total_tokens": 87
+  }
+}
diff --git a/integration-tests/models/__snapshots__/test_json_schema_constrain/test_json_schema_stream.json b/integration-tests/models/__snapshots__/test_json_schema_constrain/test_json_schema_stream.json
new file mode 100644
index 00000000..943fad30
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_json_schema_constrain/test_json_schema_stream.json
@@ -0,0 +1,743 @@
+[
+  {
+    "choices": [
+      {
+        "delta": {
+          "content": "{",
+          "role": "assistant"
+        },
+        "finish_reason": null,
+        "index": 0,
+        "logprobs": null
+      }
+    ],
+    "created": 1741975615,
+    "id": "",
+    "model": "google/gemma-3-4b-it",
+    "object": "chat.completion.chunk",
+    "system_fingerprint": "3.2.1-dev0-native",
+    "usage": null
+  },
+  {
+    "choices": [
+      {
+        "delta": {
+          "content": "\"",
+          "role": "assistant"
+        },
+        "finish_reason": null,
+        "index": 0,
+        "logprobs": null
+      }
+    ],
+    "created": 1741975615,
+    "id": "",
+    "model": "google/gemma-3-4b-it",
+    "object": "chat.completion.chunk",
+    "system_fingerprint": "3.2.1-dev0-native",
+    "usage": null
+  },
+  {
+    "choices": [
+      {
+        "delta": {
+          "content": "f",
+          "role": "assistant"
+        },
+        "finish_reason": null,
+        "index": 0,
+        "logprobs": null
+      }
+    ],
+    "created": 1741975615,
+    "id": "",
+    "model": "google/gemma-3-4b-it",
+    "object": "chat.completion.chunk",
+    "system_fingerprint": "3.2.1-dev0-native",
+    "usage": null
+  },
+  {
+    "choices": [
+      {
+        "delta": {
+          "content": "irs",
+          "role": "assistant"
+        },
+        "finish_reason": null,
+        "index": 0,
+        "logprobs": null
+      }
+    ],
+    "created": 1741975615,
+    "id": "",
+    "model": "google/gemma-3-4b-it",
+    "object": "chat.completion.chunk",
+    "system_fingerprint": "3.2.1-dev0-native",
+    "usage": null
+  },
+  {
+    "choices": [
+      {
+        "delta": {
+          "content": "t",
+          "role": "assistant"
+        },
+        "finish_reason": null,
+        "index": 0,
+        "logprobs": null
+      }
+    ],
+    "created": 1741975615,
+    "id": "",
+    "model": "google/gemma-3-4b-it",
+    "object": "chat.completion.chunk",
+    "system_fingerprint": "3.2.1-dev0-native",
+    "usage": null
+  },
+  {
+    "choices": [
+      {
+        "delta": {
+          "content": "Name",
+          "role": "assistant"
+        },
+        "finish_reason": null,
+        "index": 0,
+        "logprobs": null
+      }
+    ],
+    "created": 1741975615,
+    "id": "",
+    "model": "google/gemma-3-4b-it",
+    "object": "chat.completion.chunk",
+    "system_fingerprint": "3.2.1-dev0-native",
+    "usage": null
+  },
+  {
+    "choices": [
+      {
+        "delta": {
+          "content": "\":",
+          "role": "assistant"
+        },
+        "finish_reason": null,
+        "index": 0,
+        "logprobs": null
+      }
+    ],
+    "created": 1741975615,
+    "id": "",
+    "model": "google/gemma-3-4b-it",
+    "object": "chat.completion.chunk",
+    "system_fingerprint": "3.2.1-dev0-native",
+    "usage": null
+  },
+  {
+    "choices": [
+      {
+        "delta": {
+          "content": "\"",
+          "role": "assistant"
+        },
+        "finish_reason": null,
+        "index": 0,
+        "logprobs": null
+      }
+    ],
+    "created": 1741975615,
+    "id": "",
+    "model": "google/gemma-3-4b-it",
+    "object": "chat.completion.chunk",
+    "system_fingerprint": "3.2.1-dev0-native",
+    "usage": null
+  },
+  {
+    "choices": [
+      {
+        "delta": {
+          "content": "David",
+          "role": "assistant"
+        },
+        "finish_reason": null,
+        "index": 0,
+        "logprobs": null
+      }
+    ],
+    "created": 1741975615,
+    "id": "",
+    "model": "google/gemma-3-4b-it",
+    "object": "chat.completion.chunk",
+    "system_fingerprint": "3.2.1-dev0-native",
+    "usage": null
+  },
+  {
+    "choices": [
+      {
+        "delta": {
+          "content": "\",",
+          "role": "assistant"
+        },
+        "finish_reason": null,
+        "index": 0,
+        "logprobs": null
+      }
+    ],
+    "created": 1741975615,
+    "id": "",
+    "model": "google/gemma-3-4b-it",
+    "object": "chat.completion.chunk",
+    "system_fingerprint": "3.2.1-dev0-native",
+    "usage": null
+  },
+  {
+    "choices": [
+      {
+        "delta": {
+          "content": "\"",
+          "role": "assistant"
+        },
+        "finish_reason": null,
+        "index": 0,
+        "logprobs": null
+      }
+    ],
+    "created": 1741975615,
+    "id": "",
+    "model": "google/gemma-3-4b-it",
+    "object": "chat.completion.chunk",
+    "system_fingerprint": "3.2.1-dev0-native",
+    "usage": null
+  },
+  {
+    "choices": [
+      {
+        "delta": {
+          "content": "l",
+          "role": "assistant"
+        },
+        "finish_reason": null,
+        "index": 0,
+        "logprobs": null
+      }
+    ],
+    "created": 1741975615,
+    "id": "",
+    "model": "google/gemma-3-4b-it",
+    "object": "chat.completion.chunk",
+    "system_fingerprint": "3.2.1-dev0-native",
+    "usage": null
+  },
+  {
+    "choices": [
+      {
+        "delta": {
+          "content": "ast",
+          "role": "assistant"
+        },
+        "finish_reason": null,
+        "index": 0,
+        "logprobs": null
+      }
+    ],
+    "created": 1741975615,
+    "id": "",
+    "model": "google/gemma-3-4b-it",
+    "object": "chat.completion.chunk",
+    "system_fingerprint": "3.2.1-dev0-native",
+    "usage": null
+  },
+  {
+    "choices": [
+      {
+        "delta": {
+          "content": "Name",
+          "role": "assistant"
+        },
+        "finish_reason": null,
+        "index": 0,
+        "logprobs": null
+      }
+    ],
+    "created": 1741975615,
+    "id": "",
+    "model": "google/gemma-3-4b-it",
+    "object": "chat.completion.chunk",
+    "system_fingerprint": "3.2.1-dev0-native",
+    "usage": null
+  },
+  {
+    "choices": [
+      {
+        "delta": {
+          "content": "\":",
+          "role": "assistant"
+        },
+        "finish_reason": null,
+        "index": 0,
+        "logprobs": null
+      }
+    ],
+    "created": 1741975615,
+    "id": "",
+    "model": "google/gemma-3-4b-it",
+    "object": "chat.completion.chunk",
+    "system_fingerprint": "3.2.1-dev0-native",
+    "usage": null
+  },
+  {
+    "choices": [
+      {
+        "delta": {
+          "content": "\"",
+          "role": "assistant"
+        },
+        "finish_reason": null,
+        "index": 0,
+        "logprobs": null
+      }
+    ],
+    "created": 1741975615,
+    "id": "",
+    "model": "google/gemma-3-4b-it",
+    "object": "chat.completion.chunk",
+    "system_fingerprint": "3.2.1-dev0-native",
+    "usage": null
+  },
+  {
+    "choices": [
+      {
+        "delta": {
+          "content": "Unknown",
+          "role": "assistant"
+        },
+        "finish_reason": null,
+        "index": 0,
+        "logprobs": null
+      }
+    ],
+    "created": 1741975615,
+    "id": "",
+    "model": "google/gemma-3-4b-it",
+    "object": "chat.completion.chunk",
+    "system_fingerprint": "3.2.1-dev0-native",
+    "usage": null
+  },
+  {
+    "choices": [
+      {
+        "delta": {
+          "content": "\",",
+          "role": "assistant"
+        },
+        "finish_reason": null,
+        "index": 0,
+        "logprobs": null
+      }
+    ],
+    "created": 1741975616,
+    "id": "",
+    "model": "google/gemma-3-4b-it",
+    "object": "chat.completion.chunk",
+    "system_fingerprint": "3.2.1-dev0-native",
+    "usage": null
+  },
+  {
+    "choices": [
+      {
+        "delta": {
+          "content": "\"",
+          "role": "assistant"
+        },
+        "finish_reason": null,
+        "index": 0,
+        "logprobs": null
+      }
+    ],
+    "created": 1741975616,
+    "id": "",
+    "model": "google/gemma-3-4b-it",
+    "object": "chat.completion.chunk",
+    "system_fingerprint": "3.2.1-dev0-native",
+    "usage": null
+  },
+  {
+    "choices": [
+      {
+        "delta": {
+          "content": "h",
+          "role": "assistant"
+        },
+        "finish_reason": null,
+        "index": 0,
+        "logprobs": null
+      }
+    ],
+    "created": 1741975616,
+    "id": "",
+    "model": "google/gemma-3-4b-it",
+    "object": "chat.completion.chunk",
+    "system_fingerprint": "3.2.1-dev0-native",
+    "usage": null
+  },
+  {
+    "choices": [
+      {
+        "delta": {
+          "content": "obb",
+          "role": "assistant"
+        },
+        "finish_reason": null,
+        "index": 0,
+        "logprobs": null
+      }
+    ],
+    "created": 1741975616,
+    "id": "",
+    "model": "google/gemma-3-4b-it",
+    "object": "chat.completion.chunk",
+    "system_fingerprint": "3.2.1-dev0-native",
+    "usage": null
+  },
+  {
+    "choices": [
+      {
+        "delta": {
+          "content": "y",
+          "role": "assistant"
+        },
+        "finish_reason": null,
+        "index": 0,
+        "logprobs": null
+      }
+    ],
+    "created": 1741975616,
+    "id": "",
+    "model": "google/gemma-3-4b-it",
+    "object": "chat.completion.chunk",
+    "system_fingerprint": "3.2.1-dev0-native",
+    "usage": null
+  },
+  {
+    "choices": [
+      {
+        "delta": {
+          "content": "\":",
+          "role": "assistant"
+        },
+        "finish_reason": null,
+        "index": 0,
+        "logprobs": null
+      }
+    ],
+    "created": 1741975616,
+    "id": "",
+    "model": "google/gemma-3-4b-it",
+    "object": "chat.completion.chunk",
+    "system_fingerprint": "3.2.1-dev0-native",
+    "usage": null
+  },
+  {
+    "choices": [
+      {
+        "delta": {
+          "content": "\",",
+          "role": "assistant"
+        },
+        "finish_reason": null,
+        "index": 0,
+        "logprobs": null
+      }
+    ],
+    "created": 1741975616,
+    "id": "",
+    "model": "google/gemma-3-4b-it",
+    "object": "chat.completion.chunk",
+    "system_fingerprint": "3.2.1-dev0-native",
+    "usage": null
+  },
+  {
+    "choices": [
+      {
+        "delta": {
+          "content": " \\\"",
+          "role": "assistant"
+        },
+        "finish_reason": null,
+        "index": 0,
+        "logprobs": null
+      }
+    ],
+    "created": 1741975616,
+    "id": "",
+    "model": "google/gemma-3-4b-it",
+    "object": "chat.completion.chunk",
+    "system_fingerprint": "3.2.1-dev0-native",
+    "usage": null
+  },
+  {
+    "choices": [
+      {
+        "delta": {
+          "content": "riding",
+          "role": "assistant"
+        },
+        "finish_reason": null,
+        "index": 0,
+        "logprobs": null
+      }
+    ],
+    "created": 1741975616,
+    "id": "",
+    "model": "google/gemma-3-4b-it",
+    "object": "chat.completion.chunk",
+    "system_fingerprint": "3.2.1-dev0-native",
+    "usage": null
+  },
+  {
+    "choices": [
+      {
+        "delta": {
+          "content": " bicycles",
+          "role": "assistant"
+        },
+        "finish_reason": null,
+        "index": 0,
+        "logprobs": null
+      }
+    ],
+    "created": 1741975616,
+    "id": "",
+    "model": "google/gemma-3-4b-it",
+    "object": "chat.completion.chunk",
+    "system_fingerprint": "3.2.1-dev0-native",
+    "usage": null
+  },
+  {
+    "choices": [
+      {
+        "delta": {
+          "content": "\\\",",
+          "role": "assistant"
+        },
+        "finish_reason": null,
+        "index": 0,
+        "logprobs": null
+      }
+    ],
+    "created": 1741975616,
+    "id": "",
+    "model": "google/gemma-3-4b-it",
+    "object": "chat.completion.chunk",
+    "system_fingerprint": "3.2.1-dev0-native",
+    "usage": null
+  },
+  {
+    "choices": [
+      {
+        "delta": {
+          "content": " \\\"",
+          "role": "assistant"
+        },
+        "finish_reason": null,
+        "index": 0,
+        "logprobs": null
+      }
+    ],
+    "created": 1741975616,
+    "id": "",
+    "model": "google/gemma-3-4b-it",
+    "object": "chat.completion.chunk",
+    "system_fingerprint": "3.2.1-dev0-native",
+    "usage": null
+  },
+  {
+    "choices": [
+      {
+        "delta": {
+          "content": "having",
+          "role": "assistant"
+        },
+        "finish_reason": null,
+        "index": 0,
+        "logprobs": null
+      }
+    ],
+    "created": 1741975616,
+    "id": "",
+    "model": "google/gemma-3-4b-it",
+    "object": "chat.completion.chunk",
+    "system_fingerprint": "3.2.1-dev0-native",
+    "usage": null
+  },
+  {
+    "choices": [
+      {
+        "delta": {
+          "content": " cats",
+          "role": "assistant"
+        },
+        "finish_reason": null,
+        "index": 0,
+        "logprobs": null
+      }
+    ],
+    "created": 1741975616,
+    "id": "",
+    "model": "google/gemma-3-4b-it",
+    "object": "chat.completion.chunk",
+    "system_fingerprint": "3.2.1-dev0-native",
+    "usage": null
+  },
+  {
+    "choices": [
+      {
+        "delta": {
+          "content": "\\\"\",",
+          "role": "assistant"
+        },
+        "finish_reason": null,
+        "index": 0,
+        "logprobs": null
+      }
+    ],
+    "created": 1741975616,
+    "id": "",
+    "model": "google/gemma-3-4b-it",
+    "object": "chat.completion.chunk",
+    "system_fingerprint": "3.2.1-dev0-native",
+    "usage": null
+  },
+  {
+    "choices": [
+      {
+        "delta": {
+          "content": "\"",
+          "role": "assistant"
+        },
+        "finish_reason": null,
+        "index": 0,
+        "logprobs": null
+      }
+    ],
+    "created": 1741975616,
+    "id": "",
+    "model": "google/gemma-3-4b-it",
+    "object": "chat.completion.chunk",
+    "system_fingerprint": "3.2.1-dev0-native",
+    "usage": null
+  },
+  {
+    "choices": [
+      {
+        "delta": {
+          "content": "num",
+          "role": "assistant"
+        },
+        "finish_reason": null,
+        "index": 0,
+        "logprobs": null
+      }
+    ],
+    "created": 1741975616,
+    "id": "",
+    "model": "google/gemma-3-4b-it",
+    "object": "chat.completion.chunk",
+    "system_fingerprint": "3.2.1-dev0-native",
+    "usage": null
+  },
+  {
+    "choices": [
+      {
+        "delta": {
+          "content": "Cats",
+          "role": "assistant"
+        },
+        "finish_reason": null,
+        "index": 0,
+        "logprobs": null
+      }
+    ],
+    "created": 1741975616,
+    "id": "",
+    "model": "google/gemma-3-4b-it",
+    "object": "chat.completion.chunk",
+    "system_fingerprint": "3.2.1-dev0-native",
+    "usage": null
+  },
+  {
+    "choices": [
+      {
+        "delta": {
+          "content": "\":",
+          "role": "assistant"
+        },
+        "finish_reason": null,
+        "index": 0,
+        "logprobs": null
+      }
+    ],
+    "created": 1741975616,
+    "id": "",
+    "model": "google/gemma-3-4b-it",
+    "object": "chat.completion.chunk",
+    "system_fingerprint": "3.2.1-dev0-native",
+    "usage": null
+  },
+  {
+    "choices": [
+      {
+        "delta": {
+          "content": "2",
+          "role": "assistant"
+        },
+        "finish_reason": null,
+        "index": 0,
+        "logprobs": null
+      }
+    ],
+    "created": 1741975616,
+    "id": "",
+    "model": "google/gemma-3-4b-it",
+    "object": "chat.completion.chunk",
+    "system_fingerprint": "3.2.1-dev0-native",
+    "usage": null
+  },
+  {
+    "choices": [
+      {
+        "delta": {
+          "content": "}",
+          "role": "assistant"
+        },
+        "finish_reason": null,
+        "index": 0,
+        "logprobs": null
+      }
+    ],
+    "created": 1741975616,
+    "id": "",
+    "model": "google/gemma-3-4b-it",
+    "object": "chat.completion.chunk",
+    "system_fingerprint": "3.2.1-dev0-native",
+    "usage": null
+  },
+  {
+    "choices": [
+      {
+        "delta": {
+          "content": "",
+          "role": "assistant"
+        },
+        "finish_reason": "stop",
+        "index": 0,
+        "logprobs": null
+      }
+    ],
+    "created": 1741975616,
+    "id": "",
+    "model": "google/gemma-3-4b-it",
+    "object": "chat.completion.chunk",
+    "system_fingerprint": "3.2.1-dev0-native",
+    "usage": null
+  }
+]
diff --git a/integration-tests/models/test_json_schema_constrain.py b/integration-tests/models/test_json_schema_constrain.py
new file mode 100644
index 00000000..65b4a7b8
--- /dev/null
+++ b/integration-tests/models/test_json_schema_constrain.py
@@ -0,0 +1,209 @@
+import pytest
+import json
+import requests
+
+
+@pytest.fixture(scope="module")
+def model_handle(launcher):
+    """Fixture to provide the base URL for API calls."""
+    with launcher(
+        "google/gemma-3-4b-it",
+        num_shard=2,
+        disable_grammar_support=False,
+    ) as handle:
+        yield handle
+
+
+@pytest.fixture(scope="module")
+async def model_fixture(model_handle):
+    await model_handle.health(300)
+    return model_handle.client
+
+
+# Sample JSON Schema for testing
+person_schema = {
+    "type": "object",
+    "$id": "https://example.com/person.schema.json",
+    "$schema": "https://json-schema.org/draft/2020-12/schema",
+    "title": "Person",
+    "properties": {
+        "firstName": {
+            "type": "string",
+            "description": "The person's first name.",
+            "minLength": 4,
+        },
+        "lastName": {
+            "type": "string",
+            "description": "The person's last name.",
+            "minLength": 4,
+        },
+        "hobby": {
+            "description": "The person's hobby.",
+            "type": "string",
+            "minLength": 4,
+        },
+        "numCats": {
+            "description": "The number of cats the person has.",
+            "type": "integer",
+            "minimum": 0,
+        },
+    },
+    "required": ["firstName", "lastName", "hobby", "numCats"],
+}
+
+# More complex schema for testing nested objects and arrays
+complex_schema = {
+    "type": "object",
+    "properties": {
+        "name": {"type": "string"},
+        "age": {"type": "integer", "minimum": 0},
+        "address": {
+            "type": "object",
+            "properties": {
+                "street": {"type": "string"},
+                "city": {"type": "string"},
+                "postalCode": {"type": "string"},
+            },
+            "required": ["street", "city"],
+        },
+        "hobbies": {"type": "array", "items": {"type": "string"}, "minItems": 1},
+    },
+    "required": ["name", "age", "hobbies"],
+}
+
+
+@pytest.mark.asyncio
+@pytest.mark.private
+async def test_json_schema_basic(model_fixture, response_snapshot):
+    """Test basic JSON schema validation with the person schema."""
+    response = requests.post(
+        f"{model_fixture.base_url}/v1/chat/completions",
+        json={
+            "model": "tgi",
+            "messages": [
+                {
+                    "role": "user",
+                    "content": "David is a person who likes trees and nature. He enjoys studying math and science. He has 2 cats.",
+                },
+            ],
+            "seed": 42,
+            "temperature": 0.0,
+            "response_format": {
+                "type": "json_schema",
+                "value": {"name": "person", "strict": True, "schema": person_schema},
+            },
+        },
+    )
+
+    result = response.json()
+
+    # Validate response format
+    content = result["choices"][0]["message"]["content"]
+    parsed_content = json.loads(content)
+
+    assert "firstName" in parsed_content
+    assert "lastName" in parsed_content
+    assert "hobby" in parsed_content
+    assert "numCats" in parsed_content
+    assert isinstance(parsed_content["numCats"], int)
+    assert parsed_content["numCats"] >= 0
+    assert result == response_snapshot
+
+
+@pytest.mark.asyncio
+@pytest.mark.private
+async def test_json_schema_complex(model_fixture, response_snapshot):
+    """Test complex JSON schema with nested objects and arrays."""
+    response = requests.post(
+        f"{model_fixture.base_url}/v1/chat/completions",
+        json={
+            "model": "tgi",
+            "messages": [
+                {
+                    "role": "user",
+                    "content": "John Smith is 30 years old. He lives on Maple Street in Boston. He enjoys botany, astronomy, and solving mathematical puzzles.",
+                },
+            ],
+            "seed": 42,
+            "temperature": 0.0,
+            "response_format": {
+                "type": "json_schema",
+                "value": {
+                    "name": "complex_person",
+                    "strict": True,
+                    "schema": complex_schema,
+                },
+            },
+        },
+    )
+
+    result = response.json()
+
+    # Validate response format
+    content = result["choices"][0]["message"]["content"]
+    parsed_content = json.loads(content)
+
+    assert "name" in parsed_content
+    assert "age" in parsed_content
+    assert "hobbies" in parsed_content
+    assert "address" in parsed_content
+    assert "street" in parsed_content["address"]
+    assert "city" in parsed_content["address"]
+    assert isinstance(parsed_content["hobbies"], list)
+    assert len(parsed_content["hobbies"]) >= 1
+    assert result == response_snapshot
+
+
+@pytest.mark.asyncio
+@pytest.mark.private
+async def test_json_schema_stream(model_fixture, response_snapshot):
+    """Test JSON schema validation with streaming."""
+    response = requests.post(
+        f"{model_fixture.base_url}/v1/chat/completions",
+        json={
+            "model": "tgi",
+            "messages": [
+                {
+                    "role": "user",
+                    "content": "David is a person who likes to ride bicycles. He has 2 cats.",
+                },
+            ],
+            "seed": 42,
+            "temperature": 0.0,
+            "response_format": {
+                "type": "json_schema",
+                "value": {"name": "person", "strict": True, "schema": person_schema},
+            },
+            "stream": True,
+        },
+        stream=True,
+    )
+
+    chunks = []
+    content_generated = ""
+
+    for line in response.iter_lines():
+        if line:
+            # Remove the "data: " prefix and handle the special case of "[DONE]"
+            data = line.decode("utf-8")
+            if data.startswith("data: "):
+                data = data[6:]
+                if data != "[DONE]":
+                    chunk = json.loads(data)
+                    chunks.append(chunk)
+                    if "choices" in chunk and len(chunk["choices"]) > 0:
+                        if (
+                            "delta" in chunk["choices"][0]
+                            and "content" in chunk["choices"][0]["delta"]
+                        ):
+                            content_generated += chunk["choices"][0]["delta"]["content"]
+
+    # Validate the final assembled JSON
+    parsed_content = json.loads(content_generated)
+    assert "firstName" in parsed_content
+    assert "lastName" in parsed_content
+    assert "hobby" in parsed_content
+    assert "numCats" in parsed_content
+    assert isinstance(parsed_content["numCats"], int)
+    assert parsed_content["numCats"] >= 0
+    assert chunks == response_snapshot
diff --git a/router/src/lib.rs b/router/src/lib.rs
index e3b8aee9..e1a9a839 100644
--- a/router/src/lib.rs
+++ b/router/src/lib.rs
@@ -215,6 +215,21 @@ impl HubProcessorConfig {
     }
 }
 
+#[derive(Clone, Debug, Deserialize, ToSchema, Serialize)]
+#[cfg_attr(test, derive(PartialEq))]
+struct JsonSchemaConfig {
+    /// Optional name identifier for the schema
+    #[serde(skip_serializing_if = "Option::is_none")]
+    name: Option<String>,
+
+    /// Whether to enforce strict validation (optional)
+    #[serde(skip_serializing_if = "Option::is_none")]
+    strict: Option<bool>,
+
+    /// The actual JSON schema definition
+    schema: serde_json::Value,
+}
+
 #[derive(Clone, Debug, Deserialize, ToSchema, Serialize)]
 #[cfg_attr(test, derive(PartialEq))]
 #[serde(tag = "type", content = "value")]
@@ -224,12 +239,19 @@ pub(crate) enum GrammarType {
     /// JSON Schema is a declarative language that allows to annotate JSON documents
     /// with types and descriptions.
     #[serde(rename = "json")]
-    #[serde(alias = "json_schema")]
     #[serde(alias = "json_object")]
     #[schema(example = json ! ({"properties": {"location":{"type": "string"}}}))]
     Json(serde_json::Value),
+
     #[serde(rename = "regex")]
     Regex(String),
+
+    /// A JSON Schema specification with additional metadata.
+    ///
+    /// Includes an optional name for the schema, an optional strict flag, and the required schema definition.
+    #[serde(rename = "json_schema")]
+    #[schema(example = json ! ({"schema": {"properties": {"name": {"type": "string"}, "age": {"type": "integer"}}}, "name": "person_info", "strict": true}))]
+    JsonSchema(JsonSchemaConfig),
 }
 
 #[derive(Clone, Debug, Serialize, ToSchema)]
diff --git a/router/src/validation.rs b/router/src/validation.rs
index 1119347d..625a4bdc 100644
--- a/router/src/validation.rs
+++ b/router/src/validation.rs
@@ -380,6 +380,32 @@ impl Validation {
 
                         ValidGrammar::Regex(grammar_regex.to_string())
                     }
+                    GrammarType::JsonSchema(schema_config) => {
+                        // Extract the actual schema for validation
+                        let json = &schema_config.schema;
+
+                        // Check if the json is a valid JSONSchema
+                        jsonschema::draft202012::meta::validate(json)
+                            .map_err(|e| ValidationError::InvalidGrammar(e.to_string()))?;
+
+                        // The schema can be valid but lack properties.
+                        // We need properties for the grammar to be successfully parsed in Python.
+                        // Therefore, we must check and throw an error if properties are missing.
+                        json.get("properties")
+                            .ok_or(ValidationError::InvalidGrammar(
+                                "Grammar must have a 'properties' field".to_string(),
+                            ))?;
+
+                        // TODO:
+                        // Apply strictness if specified
+                        let _strict = schema_config.strict.unwrap_or(false);
+
+                        // Do compilation in the router for performance
+                        let grammar_regex = json_schema_to_regex(json, None, json)
+                            .map_err(ValidationError::RegexFromSchema)?;
+
+                        ValidGrammar::Regex(grammar_regex.to_string())
+                    }
                     GrammarType::Regex(regex) => ValidGrammar::Regex(regex),
                 };
                 Some(valid_grammar)

From 65c6008847b466b5b673c91dccd90d19913619f9 Mon Sep 17 00:00:00 2001
From: drbh <david.richard.holtz@gmail.com>
Date: Mon, 17 Mar 2025 14:51:33 +0000
Subject: [PATCH 8/8] fix: bump openapi doc with new grammar option

---
 docs/openapi.json        | 34 ++++++++++++++++++++++++++++++++++
 router/src/lib.rs        |  4 ----
 router/src/server.rs     |  3 ++-
 router/src/validation.rs |  4 ----
 4 files changed, 36 insertions(+), 9 deletions(-)

diff --git a/docs/openapi.json b/docs/openapi.json
index 85ca3f97..2d3a023a 100644
--- a/docs/openapi.json
+++ b/docs/openapi.json
@@ -1771,6 +1771,24 @@
                 "type": "string"
               }
             }
+          },
+          {
+            "type": "object",
+            "required": [
+              "type",
+              "value"
+            ],
+            "properties": {
+              "type": {
+                "type": "string",
+                "enum": [
+                  "json_schema"
+                ]
+              },
+              "value": {
+                "$ref": "#/components/schemas/JsonSchemaConfig"
+              }
+            }
           }
         ],
         "discriminator": {
@@ -1864,6 +1882,22 @@
           }
         }
       },
+      "JsonSchemaConfig": {
+        "type": "object",
+        "required": [
+          "schema"
+        ],
+        "properties": {
+          "name": {
+            "type": "string",
+            "description": "Optional name identifier for the schema",
+            "nullable": true
+          },
+          "schema": {
+            "description": "The actual JSON schema definition"
+          }
+        }
+      },
       "Message": {
         "allOf": [
           {
diff --git a/router/src/lib.rs b/router/src/lib.rs
index e1a9a839..e1ee0541 100644
--- a/router/src/lib.rs
+++ b/router/src/lib.rs
@@ -222,10 +222,6 @@ struct JsonSchemaConfig {
     #[serde(skip_serializing_if = "Option::is_none")]
     name: Option<String>,
 
-    /// Whether to enforce strict validation (optional)
-    #[serde(skip_serializing_if = "Option::is_none")]
-    strict: Option<bool>,
-
     /// The actual JSON schema definition
     schema: serde_json::Value,
 }
diff --git a/router/src/server.rs b/router/src/server.rs
index 45d2b9f3..1cf34546 100644
--- a/router/src/server.rs
+++ b/router/src/server.rs
@@ -13,7 +13,6 @@ use crate::sagemaker::{
 };
 use crate::validation::ValidationError;
 use crate::vertex::vertex_compatibility;
-use crate::ChatTokenizeResponse;
 use crate::{
     usage_stats, BestOfSequence, Details, ErrorResponse, FinishReason, FunctionName,
     GenerateParameters, GenerateRequest, GenerateResponse, GrammarType, HubModelInfo,
@@ -28,6 +27,7 @@ use crate::{
     ChatRequest, Chunk, CompatGenerateRequest, Completion, CompletionComplete, CompletionFinal,
     CompletionRequest, CompletionType, DeltaToolCall, Function, Prompt, Tool,
 };
+use crate::{ChatTokenizeResponse, JsonSchemaConfig};
 use crate::{FunctionDefinition, HubPreprocessorConfig, ToolCall, ToolChoice};
 use crate::{MessageBody, ModelInfo, ModelsInfo};
 use async_stream::__private::AsyncStream;
@@ -1339,6 +1339,7 @@ CompatGenerateRequest,
 SagemakerRequest,
 GenerateRequest,
 GrammarType,
+JsonSchemaConfig,
 ChatRequest,
 Message,
 MessageContent,
diff --git a/router/src/validation.rs b/router/src/validation.rs
index 625a4bdc..3068b14b 100644
--- a/router/src/validation.rs
+++ b/router/src/validation.rs
@@ -396,10 +396,6 @@ impl Validation {
                                 "Grammar must have a 'properties' field".to_string(),
                             ))?;
 
-                        // TODO:
-                        // Apply strictness if specified
-                        let _strict = schema_config.strict.unwrap_or(false);
-
                         // Do compilation in the router for performance
                         let grammar_regex = json_schema_to_regex(json, None, json)
                             .map_err(ValidationError::RegexFromSchema)?;