Merge b1a9dfff21 into 84ab88d843

2025-07-15 20:30:16 +00:00 · 2025-04-17 11:48:10 -07:00 · 2025-04-17 11:48:10 -07:00 · d048a9c5fe
commit d048a9c5fe
parent 84ab88d843 b1a9dfff21
2 changed files with 47 additions and 17 deletions
--- a/integration-tests/models/test_grammar_response_format_llama.py
+++ b/integration-tests/models/test_grammar_response_format_llama.py
@ -29,26 +29,55 @@ async def test_grammar_response_format_llama_json(llama_grammar, response_snapsh
        unit: str
        temperature: List[int]
    json_payload={
        "model": "tgi",
        "messages": [
            {
                "role": "system",
                "content": f"Respond to the users questions and answer them in the following format: {Weather.schema()}",
            },
            {
                "role": "user",
                "content": "What's the weather like the next 3 days in San Francisco, CA?",
            },
        ],
        "seed": 42,
        "max_tokens": 500,
        "response_format": {"type": "json_object", "value": Weather.schema()},
    }
    # send the request
    response = requests.post(
        f"{llama_grammar.base_url}/v1/chat/completions",
        headers=llama_grammar.headers,
-        json={
+        json=json_payload,
-            "model": "tgi",
+    )
-            "messages": [
+
-                {
+    chat_completion = response.json()
-                    "role": "system",
+    called = chat_completion["choices"][0]["message"]["content"]
-                    "content": f"Respond to the users questions and answer them in the following format: {Weather.schema()}",
+
-                },
+    assert response.status_code == 200
-                {
+    assert called == '{ "unit": "fahrenheit", "temperature": [ 72, 79, 88 ] }'
-                    "role": "user",
+    assert chat_completion == response_snapshot
-                    "content": "What's the weather like the next 3 days in San Francisco, CA?",
+
-                },
+    json_payload["response_format"]["type"] = "json"
-            ],
+    response = requests.post(
-            "seed": 42,
+        f"{llama_grammar.base_url}/v1/chat/completions",
-            "max_tokens": 500,
+        headers=llama_grammar.headers,
-            "response_format": {"type": "json_object", "value": Weather.schema()},
+        json=json_payload,
-        },
+    )
    chat_completion = response.json()
    called = chat_completion["choices"][0]["message"]["content"]
    assert response.status_code == 200
    assert called == '{ "unit": "fahrenheit", "temperature": [ 72, 79, 88 ] }'
    assert chat_completion == response_snapshot
    json_payload["response_format"]["type"] = "json_schema"
    response = requests.post(
        f"{llama_grammar.base_url}/v1/chat/completions",
        headers=llama_grammar.headers,
        json=json_payload,
    )
    chat_completion = response.json()
--- a/router/src/lib.rs
+++ b/router/src/lib.rs
@ -230,7 +230,8 @@ pub(crate) enum GrammarType {
    ///
    /// JSON Schema is a declarative language that allows to annotate JSON documents
    /// with types and descriptions.
-    #[serde(rename = "json")]
+    #[serde(rename = "json_schema")]
    #[serde(alias = "json")]
    #[serde(alias = "json_object")]
    #[schema(example = json ! ({"properties": {"location":{"type": "string"}}}))]
    Json(serde_json::Value),