mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-19 22:02:06 +00:00
Merge b1a9dfff21
into 84ab88d843
This commit is contained in:
commit
d048a9c5fe
@ -29,26 +29,55 @@ async def test_grammar_response_format_llama_json(llama_grammar, response_snapsh
|
|||||||
unit: str
|
unit: str
|
||||||
temperature: List[int]
|
temperature: List[int]
|
||||||
|
|
||||||
|
json_payload={
|
||||||
|
"model": "tgi",
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "system",
|
||||||
|
"content": f"Respond to the users questions and answer them in the following format: {Weather.schema()}",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "What's the weather like the next 3 days in San Francisco, CA?",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
"seed": 42,
|
||||||
|
"max_tokens": 500,
|
||||||
|
"response_format": {"type": "json_object", "value": Weather.schema()},
|
||||||
|
}
|
||||||
# send the request
|
# send the request
|
||||||
response = requests.post(
|
response = requests.post(
|
||||||
f"{llama_grammar.base_url}/v1/chat/completions",
|
f"{llama_grammar.base_url}/v1/chat/completions",
|
||||||
headers=llama_grammar.headers,
|
headers=llama_grammar.headers,
|
||||||
json={
|
json=json_payload,
|
||||||
"model": "tgi",
|
)
|
||||||
"messages": [
|
|
||||||
{
|
chat_completion = response.json()
|
||||||
"role": "system",
|
called = chat_completion["choices"][0]["message"]["content"]
|
||||||
"content": f"Respond to the users questions and answer them in the following format: {Weather.schema()}",
|
|
||||||
},
|
assert response.status_code == 200
|
||||||
{
|
assert called == '{ "unit": "fahrenheit", "temperature": [ 72, 79, 88 ] }'
|
||||||
"role": "user",
|
assert chat_completion == response_snapshot
|
||||||
"content": "What's the weather like the next 3 days in San Francisco, CA?",
|
|
||||||
},
|
json_payload["response_format"]["type"] = "json"
|
||||||
],
|
response = requests.post(
|
||||||
"seed": 42,
|
f"{llama_grammar.base_url}/v1/chat/completions",
|
||||||
"max_tokens": 500,
|
headers=llama_grammar.headers,
|
||||||
"response_format": {"type": "json_object", "value": Weather.schema()},
|
json=json_payload,
|
||||||
},
|
)
|
||||||
|
|
||||||
|
chat_completion = response.json()
|
||||||
|
called = chat_completion["choices"][0]["message"]["content"]
|
||||||
|
|
||||||
|
assert response.status_code == 200
|
||||||
|
assert called == '{ "unit": "fahrenheit", "temperature": [ 72, 79, 88 ] }'
|
||||||
|
assert chat_completion == response_snapshot
|
||||||
|
|
||||||
|
json_payload["response_format"]["type"] = "json_schema"
|
||||||
|
response = requests.post(
|
||||||
|
f"{llama_grammar.base_url}/v1/chat/completions",
|
||||||
|
headers=llama_grammar.headers,
|
||||||
|
json=json_payload,
|
||||||
)
|
)
|
||||||
|
|
||||||
chat_completion = response.json()
|
chat_completion = response.json()
|
||||||
|
@ -230,7 +230,8 @@ pub(crate) enum GrammarType {
|
|||||||
///
|
///
|
||||||
/// JSON Schema is a declarative language that allows to annotate JSON documents
|
/// JSON Schema is a declarative language that allows to annotate JSON documents
|
||||||
/// with types and descriptions.
|
/// with types and descriptions.
|
||||||
#[serde(rename = "json")]
|
#[serde(rename = "json_schema")]
|
||||||
|
#[serde(alias = "json")]
|
||||||
#[serde(alias = "json_object")]
|
#[serde(alias = "json_object")]
|
||||||
#[schema(example = json ! ({"properties": {"location":{"type": "string"}}}))]
|
#[schema(example = json ! ({"properties": {"location":{"type": "string"}}}))]
|
||||||
Json(serde_json::Value),
|
Json(serde_json::Value),
|
||||||
|
Loading…
Reference in New Issue
Block a user