feat: support json_schema grammar constraining and add tests

This commit is contained in:
drbh 2025-03-14 18:13:03 +00:00
parent 5e61553f48
commit 71ef9da72c
6 changed files with 1047 additions and 1 deletions

View File

@ -0,0 +1,23 @@
{
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"message": {
"content": "{\"firstName\":\"David\",\"lastName\":\"(Not provided)\",\"hobby\":\": Trees and nature\",\"numCats\":2}",
"role": "assistant"
}
}
],
"created": 1741975610,
"id": "",
"model": "google/gemma-3-4b-it",
"object": "chat.completion",
"system_fingerprint": "3.2.1-dev0-native",
"usage": {
"completion_tokens": 37,
"prompt_tokens": 32,
"total_tokens": 69
}
}

View File

@ -0,0 +1,23 @@
{
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"message": {
"content": "{\"name\":\"John Smith\",\"age\":30,\"address\":{\"street\":\"Maple Street\",\"city\":\"Boston\"},\"hobbies\":[\"botany\",\", \",\"astronomy\",\", \",\"solving mathematical puzzles\"]}",
"role": "assistant"
}
}
],
"created": 1741975505,
"id": "",
"model": "google/gemma-3-4b-it",
"object": "chat.completion",
"system_fingerprint": "3.2.1-dev0-native",
"usage": {
"completion_tokens": 50,
"prompt_tokens": 37,
"total_tokens": 87
}
}

View File

@ -0,0 +1,743 @@
[
{
"choices": [
{
"delta": {
"content": "{",
"role": "assistant"
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741975615,
"id": "",
"model": "google/gemma-3-4b-it",
"object": "chat.completion.chunk",
"system_fingerprint": "3.2.1-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": "\"",
"role": "assistant"
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741975615,
"id": "",
"model": "google/gemma-3-4b-it",
"object": "chat.completion.chunk",
"system_fingerprint": "3.2.1-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": "f",
"role": "assistant"
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741975615,
"id": "",
"model": "google/gemma-3-4b-it",
"object": "chat.completion.chunk",
"system_fingerprint": "3.2.1-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": "irs",
"role": "assistant"
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741975615,
"id": "",
"model": "google/gemma-3-4b-it",
"object": "chat.completion.chunk",
"system_fingerprint": "3.2.1-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": "t",
"role": "assistant"
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741975615,
"id": "",
"model": "google/gemma-3-4b-it",
"object": "chat.completion.chunk",
"system_fingerprint": "3.2.1-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": "Name",
"role": "assistant"
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741975615,
"id": "",
"model": "google/gemma-3-4b-it",
"object": "chat.completion.chunk",
"system_fingerprint": "3.2.1-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": "\":",
"role": "assistant"
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741975615,
"id": "",
"model": "google/gemma-3-4b-it",
"object": "chat.completion.chunk",
"system_fingerprint": "3.2.1-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": "\"",
"role": "assistant"
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741975615,
"id": "",
"model": "google/gemma-3-4b-it",
"object": "chat.completion.chunk",
"system_fingerprint": "3.2.1-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": "David",
"role": "assistant"
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741975615,
"id": "",
"model": "google/gemma-3-4b-it",
"object": "chat.completion.chunk",
"system_fingerprint": "3.2.1-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": "\",",
"role": "assistant"
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741975615,
"id": "",
"model": "google/gemma-3-4b-it",
"object": "chat.completion.chunk",
"system_fingerprint": "3.2.1-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": "\"",
"role": "assistant"
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741975615,
"id": "",
"model": "google/gemma-3-4b-it",
"object": "chat.completion.chunk",
"system_fingerprint": "3.2.1-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": "l",
"role": "assistant"
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741975615,
"id": "",
"model": "google/gemma-3-4b-it",
"object": "chat.completion.chunk",
"system_fingerprint": "3.2.1-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": "ast",
"role": "assistant"
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741975615,
"id": "",
"model": "google/gemma-3-4b-it",
"object": "chat.completion.chunk",
"system_fingerprint": "3.2.1-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": "Name",
"role": "assistant"
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741975615,
"id": "",
"model": "google/gemma-3-4b-it",
"object": "chat.completion.chunk",
"system_fingerprint": "3.2.1-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": "\":",
"role": "assistant"
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741975615,
"id": "",
"model": "google/gemma-3-4b-it",
"object": "chat.completion.chunk",
"system_fingerprint": "3.2.1-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": "\"",
"role": "assistant"
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741975615,
"id": "",
"model": "google/gemma-3-4b-it",
"object": "chat.completion.chunk",
"system_fingerprint": "3.2.1-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": "Unknown",
"role": "assistant"
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741975615,
"id": "",
"model": "google/gemma-3-4b-it",
"object": "chat.completion.chunk",
"system_fingerprint": "3.2.1-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": "\",",
"role": "assistant"
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741975616,
"id": "",
"model": "google/gemma-3-4b-it",
"object": "chat.completion.chunk",
"system_fingerprint": "3.2.1-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": "\"",
"role": "assistant"
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741975616,
"id": "",
"model": "google/gemma-3-4b-it",
"object": "chat.completion.chunk",
"system_fingerprint": "3.2.1-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": "h",
"role": "assistant"
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741975616,
"id": "",
"model": "google/gemma-3-4b-it",
"object": "chat.completion.chunk",
"system_fingerprint": "3.2.1-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": "obb",
"role": "assistant"
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741975616,
"id": "",
"model": "google/gemma-3-4b-it",
"object": "chat.completion.chunk",
"system_fingerprint": "3.2.1-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": "y",
"role": "assistant"
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741975616,
"id": "",
"model": "google/gemma-3-4b-it",
"object": "chat.completion.chunk",
"system_fingerprint": "3.2.1-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": "\":",
"role": "assistant"
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741975616,
"id": "",
"model": "google/gemma-3-4b-it",
"object": "chat.completion.chunk",
"system_fingerprint": "3.2.1-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": "\",",
"role": "assistant"
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741975616,
"id": "",
"model": "google/gemma-3-4b-it",
"object": "chat.completion.chunk",
"system_fingerprint": "3.2.1-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": " \\\"",
"role": "assistant"
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741975616,
"id": "",
"model": "google/gemma-3-4b-it",
"object": "chat.completion.chunk",
"system_fingerprint": "3.2.1-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": "riding",
"role": "assistant"
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741975616,
"id": "",
"model": "google/gemma-3-4b-it",
"object": "chat.completion.chunk",
"system_fingerprint": "3.2.1-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": " bicycles",
"role": "assistant"
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741975616,
"id": "",
"model": "google/gemma-3-4b-it",
"object": "chat.completion.chunk",
"system_fingerprint": "3.2.1-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": "\\\",",
"role": "assistant"
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741975616,
"id": "",
"model": "google/gemma-3-4b-it",
"object": "chat.completion.chunk",
"system_fingerprint": "3.2.1-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": " \\\"",
"role": "assistant"
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741975616,
"id": "",
"model": "google/gemma-3-4b-it",
"object": "chat.completion.chunk",
"system_fingerprint": "3.2.1-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": "having",
"role": "assistant"
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741975616,
"id": "",
"model": "google/gemma-3-4b-it",
"object": "chat.completion.chunk",
"system_fingerprint": "3.2.1-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": " cats",
"role": "assistant"
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741975616,
"id": "",
"model": "google/gemma-3-4b-it",
"object": "chat.completion.chunk",
"system_fingerprint": "3.2.1-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": "\\\"\",",
"role": "assistant"
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741975616,
"id": "",
"model": "google/gemma-3-4b-it",
"object": "chat.completion.chunk",
"system_fingerprint": "3.2.1-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": "\"",
"role": "assistant"
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741975616,
"id": "",
"model": "google/gemma-3-4b-it",
"object": "chat.completion.chunk",
"system_fingerprint": "3.2.1-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": "num",
"role": "assistant"
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741975616,
"id": "",
"model": "google/gemma-3-4b-it",
"object": "chat.completion.chunk",
"system_fingerprint": "3.2.1-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": "Cats",
"role": "assistant"
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741975616,
"id": "",
"model": "google/gemma-3-4b-it",
"object": "chat.completion.chunk",
"system_fingerprint": "3.2.1-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": "\":",
"role": "assistant"
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741975616,
"id": "",
"model": "google/gemma-3-4b-it",
"object": "chat.completion.chunk",
"system_fingerprint": "3.2.1-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": "2",
"role": "assistant"
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741975616,
"id": "",
"model": "google/gemma-3-4b-it",
"object": "chat.completion.chunk",
"system_fingerprint": "3.2.1-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": "}",
"role": "assistant"
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1741975616,
"id": "",
"model": "google/gemma-3-4b-it",
"object": "chat.completion.chunk",
"system_fingerprint": "3.2.1-dev0-native",
"usage": null
},
{
"choices": [
{
"delta": {
"content": "",
"role": "assistant"
},
"finish_reason": "stop",
"index": 0,
"logprobs": null
}
],
"created": 1741975616,
"id": "",
"model": "google/gemma-3-4b-it",
"object": "chat.completion.chunk",
"system_fingerprint": "3.2.1-dev0-native",
"usage": null
}
]

View File

@ -0,0 +1,209 @@
import pytest
import json
import requests
@pytest.fixture(scope="module")
def model_handle(launcher):
"""Fixture to provide the base URL for API calls."""
with launcher(
"google/gemma-3-4b-it",
num_shard=2,
disable_grammar_support=False,
) as handle:
yield handle
@pytest.fixture(scope="module")
async def model_fixture(model_handle):
await model_handle.health(300)
return model_handle.client
# Sample JSON Schema for testing
person_schema = {
"type": "object",
"$id": "https://example.com/person.schema.json",
"$schema": "https://json-schema.org/draft/2020-12/schema",
"title": "Person",
"properties": {
"firstName": {
"type": "string",
"description": "The person's first name.",
"minLength": 4,
},
"lastName": {
"type": "string",
"description": "The person's last name.",
"minLength": 4,
},
"hobby": {
"description": "The person's hobby.",
"type": "string",
"minLength": 4,
},
"numCats": {
"description": "The number of cats the person has.",
"type": "integer",
"minimum": 0,
},
},
"required": ["firstName", "lastName", "hobby", "numCats"],
}
# More complex schema for testing nested objects and arrays
complex_schema = {
"type": "object",
"properties": {
"name": {"type": "string"},
"age": {"type": "integer", "minimum": 0},
"address": {
"type": "object",
"properties": {
"street": {"type": "string"},
"city": {"type": "string"},
"postalCode": {"type": "string"},
},
"required": ["street", "city"],
},
"hobbies": {"type": "array", "items": {"type": "string"}, "minItems": 1},
},
"required": ["name", "age", "hobbies"],
}
@pytest.mark.asyncio
@pytest.mark.private
async def test_json_schema_basic(model_fixture, response_snapshot):
"""Test basic JSON schema validation with the person schema."""
response = requests.post(
f"{model_fixture.base_url}/v1/chat/completions",
json={
"model": "tgi",
"messages": [
{
"role": "user",
"content": "David is a person who likes trees and nature. He enjoys studying math and science. He has 2 cats.",
},
],
"seed": 42,
"temperature": 0.0,
"response_format": {
"type": "json_schema",
"value": {"name": "person", "strict": True, "schema": person_schema},
},
},
)
result = response.json()
# Validate response format
content = result["choices"][0]["message"]["content"]
parsed_content = json.loads(content)
assert "firstName" in parsed_content
assert "lastName" in parsed_content
assert "hobby" in parsed_content
assert "numCats" in parsed_content
assert isinstance(parsed_content["numCats"], int)
assert parsed_content["numCats"] >= 0
assert result == response_snapshot
@pytest.mark.asyncio
@pytest.mark.private
async def test_json_schema_complex(model_fixture, response_snapshot):
"""Test complex JSON schema with nested objects and arrays."""
response = requests.post(
f"{model_fixture.base_url}/v1/chat/completions",
json={
"model": "tgi",
"messages": [
{
"role": "user",
"content": "John Smith is 30 years old. He lives on Maple Street in Boston. He enjoys botany, astronomy, and solving mathematical puzzles.",
},
],
"seed": 42,
"temperature": 0.0,
"response_format": {
"type": "json_schema",
"value": {
"name": "complex_person",
"strict": True,
"schema": complex_schema,
},
},
},
)
result = response.json()
# Validate response format
content = result["choices"][0]["message"]["content"]
parsed_content = json.loads(content)
assert "name" in parsed_content
assert "age" in parsed_content
assert "hobbies" in parsed_content
assert "address" in parsed_content
assert "street" in parsed_content["address"]
assert "city" in parsed_content["address"]
assert isinstance(parsed_content["hobbies"], list)
assert len(parsed_content["hobbies"]) >= 1
assert result == response_snapshot
@pytest.mark.asyncio
@pytest.mark.private
async def test_json_schema_stream(model_fixture, response_snapshot):
"""Test JSON schema validation with streaming."""
response = requests.post(
f"{model_fixture.base_url}/v1/chat/completions",
json={
"model": "tgi",
"messages": [
{
"role": "user",
"content": "David is a person who likes to ride bicycles. He has 2 cats.",
},
],
"seed": 42,
"temperature": 0.0,
"response_format": {
"type": "json_schema",
"value": {"name": "person", "strict": True, "schema": person_schema},
},
"stream": True,
},
stream=True,
)
chunks = []
content_generated = ""
for line in response.iter_lines():
if line:
# Remove the "data: " prefix and handle the special case of "[DONE]"
data = line.decode("utf-8")
if data.startswith("data: "):
data = data[6:]
if data != "[DONE]":
chunk = json.loads(data)
chunks.append(chunk)
if "choices" in chunk and len(chunk["choices"]) > 0:
if (
"delta" in chunk["choices"][0]
and "content" in chunk["choices"][0]["delta"]
):
content_generated += chunk["choices"][0]["delta"]["content"]
# Validate the final assembled JSON
parsed_content = json.loads(content_generated)
assert "firstName" in parsed_content
assert "lastName" in parsed_content
assert "hobby" in parsed_content
assert "numCats" in parsed_content
assert isinstance(parsed_content["numCats"], int)
assert parsed_content["numCats"] >= 0
assert chunks == response_snapshot

View File

@ -215,6 +215,21 @@ impl HubProcessorConfig {
} }
} }
#[derive(Clone, Debug, Deserialize, ToSchema, Serialize)]
#[cfg_attr(test, derive(PartialEq))]
struct JsonSchemaConfig {
/// Optional name identifier for the schema
#[serde(skip_serializing_if = "Option::is_none")]
name: Option<String>,
/// Whether to enforce strict validation (optional)
#[serde(skip_serializing_if = "Option::is_none")]
strict: Option<bool>,
/// The actual JSON schema definition
schema: serde_json::Value,
}
#[derive(Clone, Debug, Deserialize, ToSchema, Serialize)] #[derive(Clone, Debug, Deserialize, ToSchema, Serialize)]
#[cfg_attr(test, derive(PartialEq))] #[cfg_attr(test, derive(PartialEq))]
#[serde(tag = "type", content = "value")] #[serde(tag = "type", content = "value")]
@ -224,12 +239,19 @@ pub(crate) enum GrammarType {
/// JSON Schema is a declarative language that allows to annotate JSON documents /// JSON Schema is a declarative language that allows to annotate JSON documents
/// with types and descriptions. /// with types and descriptions.
#[serde(rename = "json")] #[serde(rename = "json")]
#[serde(alias = "json_schema")]
#[serde(alias = "json_object")] #[serde(alias = "json_object")]
#[schema(example = json ! ({"properties": {"location":{"type": "string"}}}))] #[schema(example = json ! ({"properties": {"location":{"type": "string"}}}))]
Json(serde_json::Value), Json(serde_json::Value),
#[serde(rename = "regex")] #[serde(rename = "regex")]
Regex(String), Regex(String),
/// A JSON Schema specification with additional metadata.
///
/// Includes an optional name for the schema, an optional strict flag, and the required schema definition.
#[serde(rename = "json_schema")]
#[schema(example = json ! ({"schema": {"properties": {"name": {"type": "string"}, "age": {"type": "integer"}}}, "name": "person_info", "strict": true}))]
JsonSchema(JsonSchemaConfig),
} }
#[derive(Clone, Debug, Serialize, ToSchema)] #[derive(Clone, Debug, Serialize, ToSchema)]

View File

@ -380,6 +380,32 @@ impl Validation {
ValidGrammar::Regex(grammar_regex.to_string()) ValidGrammar::Regex(grammar_regex.to_string())
} }
GrammarType::JsonSchema(schema_config) => {
// Extract the actual schema for validation
let json = &schema_config.schema;
// Check if the json is a valid JSONSchema
jsonschema::draft202012::meta::validate(json)
.map_err(|e| ValidationError::InvalidGrammar(e.to_string()))?;
// The schema can be valid but lack properties.
// We need properties for the grammar to be successfully parsed in Python.
// Therefore, we must check and throw an error if properties are missing.
json.get("properties")
.ok_or(ValidationError::InvalidGrammar(
"Grammar must have a 'properties' field".to_string(),
))?;
// TODO:
// Apply strictness if specified
let _strict = schema_config.strict.unwrap_or(false);
// Do compilation in the router for performance
let grammar_regex = json_schema_to_regex(json, None, json)
.map_err(ValidationError::RegexFromSchema)?;
ValidGrammar::Regex(grammar_regex.to_string())
}
GrammarType::Regex(regex) => ValidGrammar::Regex(regex), GrammarType::Regex(regex) => ValidGrammar::Regex(regex),
}; };
Some(valid_grammar) Some(valid_grammar)