mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 20:34:54 +00:00
fix: prefer do_sample false unless temp is set by user, and update chat tests
This commit is contained in:
parent
d3565552af
commit
b4e7601fbe
@ -5,7 +5,7 @@
|
|||||||
"index": 0,
|
"index": 0,
|
||||||
"logprobs": null,
|
"logprobs": null,
|
||||||
"message": {
|
"message": {
|
||||||
"content": "As of your last question, the weather in Brooklyn, New York, is typically hot and humid throughout the year. The suburbs around New York City are jealously sheltered, and at least in the Lower Bronx, there are very few outdoor environments to appreciate nature.\n\nIn terms of temperature, the warmest times of the year are from June to August, when average high temperatures typically range from around 73°F or 23°C",
|
"content": "Brooklyn, New York, is located in the northeastern part of the state of New York, and its weather is characterized by a mix of humid and temperate climates. The average temperature in Brooklyn during the winter months is around 32°F (0°C) and in the summer months is around 82°F (28°C).\n\nThe city experiences four distinct seasons, with the spring and fall being the most pleasant and",
|
||||||
"name": null,
|
"name": null,
|
||||||
"role": "assistant",
|
"role": "assistant",
|
||||||
"tool_calls": null
|
"tool_calls": null
|
||||||
@ -13,7 +13,7 @@
|
|||||||
"usage": null
|
"usage": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1724792495,
|
"created": 1725383598,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
|
"model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
|
||||||
"object": "chat.completion",
|
"object": "chat.completion",
|
||||||
|
@ -1,26 +0,0 @@
|
|||||||
{
|
|
||||||
"choices": [
|
|
||||||
{
|
|
||||||
"finish_reason": "length",
|
|
||||||
"index": 0,
|
|
||||||
"logprobs": null,
|
|
||||||
"message": {
|
|
||||||
"content": "I'keeper services don't have real-time capabilities, however, I can guide you on how to find current weather conditions in Brooklyn, New York.\n\nTo get the most accurate",
|
|
||||||
"name": null,
|
|
||||||
"role": "assistant",
|
|
||||||
"tool_calls": null
|
|
||||||
},
|
|
||||||
"usage": null
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"created": 1725304474,
|
|
||||||
"id": "",
|
|
||||||
"model": "microsoft/Phi-3.5-MoE-instruct",
|
|
||||||
"object": "chat.completion",
|
|
||||||
"system_fingerprint": "2.2.1-dev0-native",
|
|
||||||
"usage": {
|
|
||||||
"completion_tokens": 40,
|
|
||||||
"prompt_tokens": 31,
|
|
||||||
"total_tokens": 71
|
|
||||||
}
|
|
||||||
}
|
|
@ -0,0 +1,26 @@
|
|||||||
|
{
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": "length",
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"message": {
|
||||||
|
"content": "I'm an AI unable to provide real-time data, but I can guide you on how to find current weather conditions in Brooklyn, New York. You can check websites like weather.com or accuweather.com, or use apps like The Weather Channel or AccuWeather on your smartphone. Alternatively, you can ask your voice assistant like Google Assistant or Siri for real-time updates.\n\nFor your information, I hope you'll have a",
|
||||||
|
"name": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": null
|
||||||
|
},
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1725383029,
|
||||||
|
"id": "",
|
||||||
|
"model": "microsoft/Phi-3.5-MoE-instruct",
|
||||||
|
"object": "chat.completion",
|
||||||
|
"system_fingerprint": "2.2.1-dev0-native",
|
||||||
|
"usage": {
|
||||||
|
"completion_tokens": 100,
|
||||||
|
"prompt_tokens": 31,
|
||||||
|
"total_tokens": 131
|
||||||
|
}
|
||||||
|
}
|
@ -35,6 +35,6 @@ async def test_flash_llama_simple(flash_llama_chat, response_snapshot):
|
|||||||
print(repr(response.choices[0].message.content))
|
print(repr(response.choices[0].message.content))
|
||||||
assert (
|
assert (
|
||||||
response.choices[0].message.content
|
response.choices[0].message.content
|
||||||
== "As of your last question, the weather in Brooklyn, New York, is typically hot and humid throughout the year. The suburbs around New York City are jealously sheltered, and at least in the Lower Bronx, there are very few outdoor environments to appreciate nature.\n\nIn terms of temperature, the warmest times of the year are from June to August, when average high temperatures typically range from around 73°F or 23°C"
|
== "Brooklyn, New York, is located in the northeastern part of the state of New York, and its weather is characterized by a mix of humid and temperate climates. The average temperature in Brooklyn during the winter months is around 32°F (0°C) and in the summer months is around 82°F (28°C).\n\nThe city experiences four distinct seasons, with the spring and fall being the most pleasant and"
|
||||||
)
|
)
|
||||||
assert response == response_snapshot
|
assert response == response_snapshot
|
||||||
|
@ -2,7 +2,7 @@ import pytest
|
|||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="module")
|
@pytest.fixture(scope="module")
|
||||||
def flash_llama_chat_handle(launcher):
|
def flash_phi35_moe_chat_handle(launcher):
|
||||||
with launcher(
|
with launcher(
|
||||||
"microsoft/Phi-3.5-MoE-instruct", num_shard=4, cuda_graphs=[1, 2]
|
"microsoft/Phi-3.5-MoE-instruct", num_shard=4, cuda_graphs=[1, 2]
|
||||||
) as handle:
|
) as handle:
|
||||||
@ -10,15 +10,15 @@ def flash_llama_chat_handle(launcher):
|
|||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="module")
|
@pytest.fixture(scope="module")
|
||||||
async def flash_llama_chat(flash_llama_chat_handle):
|
async def flash_phi35_moe_chat(flash_phi35_moe_chat_handle):
|
||||||
await flash_llama_chat_handle.health(300)
|
await flash_phi35_moe_chat_handle.health(300)
|
||||||
return flash_llama_chat_handle.client
|
return flash_phi35_moe_chat_handle.client
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.private
|
@pytest.mark.private
|
||||||
async def test_flash_llama_simple(flash_llama_chat, response_snapshot):
|
async def test_flash_phi35_moe_simple(flash_phi35_moe_chat, response_snapshot):
|
||||||
response = await flash_llama_chat.chat(
|
response = await flash_phi35_moe_chat.chat(
|
||||||
max_tokens=40,
|
max_tokens=100,
|
||||||
seed=1337,
|
seed=1337,
|
||||||
messages=[
|
messages=[
|
||||||
{
|
{
|
||||||
@ -32,9 +32,8 @@ async def test_flash_llama_simple(flash_llama_chat, response_snapshot):
|
|||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
print(repr(response.choices[0].message.content))
|
|
||||||
assert (
|
assert (
|
||||||
response.choices[0].message.content
|
response.choices[0].message.content
|
||||||
== "I'keeper services don't have real-time capabilities, however, I can guide you on how to find current weather conditions in Brooklyn, New York.\n\nTo get the most accurate"
|
== "I'm an AI unable to provide real-time data, but I can guide you on how to find current weather conditions in Brooklyn, New York. You can check websites like weather.com or accuweather.com, or use apps like The Weather Channel or AccuWeather on your smartphone. Alternatively, you can ask your voice assistant like Google Assistant or Siri for real-time updates.\n\nFor your information, I hope you'll have a"
|
||||||
)
|
)
|
||||||
assert response == response_snapshot
|
assert response == response_snapshot
|
||||||
|
@ -4,7 +4,9 @@ import pytest
|
|||||||
@pytest.fixture(scope="module")
|
@pytest.fixture(scope="module")
|
||||||
def flash_llama_grammar_tools_handle(launcher):
|
def flash_llama_grammar_tools_handle(launcher):
|
||||||
with launcher(
|
with launcher(
|
||||||
"TinyLlama/TinyLlama-1.1B-Chat-v1.0", num_shard=2, disable_grammar_support=False
|
"meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||||
|
num_shard=2,
|
||||||
|
disable_grammar_support=False,
|
||||||
) as handle:
|
) as handle:
|
||||||
yield handle
|
yield handle
|
||||||
|
|
||||||
@ -208,7 +210,7 @@ async def test_flash_llama_grammar_tools_stream(
|
|||||||
async for response in responses:
|
async for response in responses:
|
||||||
count += 1
|
count += 1
|
||||||
|
|
||||||
assert count == 48
|
assert count == 28
|
||||||
assert response == response_snapshot
|
assert response == response_snapshot
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user