From b4e7601fbe4e9a8a426afdaa8ce57577f9eb7470 Mon Sep 17 00:00:00 2001 From: drbh Date: Tue, 3 Sep 2024 17:32:11 +0000 Subject: [PATCH] fix: prefer do_sample false unless temp is set by user, and update chat tests --- .../test_flash_llama_simple.json | 4 +-- .../test_flash_llama_simple.json | 26 ------------------- .../test_flash_phi35_moe_simple.json | 26 +++++++++++++++++++ integration-tests/models/test_chat_llama.py | 2 +- .../models/test_flash_phi35_moe.py | 17 ++++++------ integration-tests/models/test_tools_llama.py | 6 +++-- 6 files changed, 41 insertions(+), 40 deletions(-) delete mode 100644 integration-tests/models/__snapshots__/test_flash_phi35_moe/test_flash_llama_simple.json create mode 100644 integration-tests/models/__snapshots__/test_flash_phi35_moe/test_flash_phi35_moe_simple.json diff --git a/integration-tests/models/__snapshots__/test_chat_llama/test_flash_llama_simple.json b/integration-tests/models/__snapshots__/test_chat_llama/test_flash_llama_simple.json index 5553e17d..30e9727d 100644 --- a/integration-tests/models/__snapshots__/test_chat_llama/test_flash_llama_simple.json +++ b/integration-tests/models/__snapshots__/test_chat_llama/test_flash_llama_simple.json @@ -5,7 +5,7 @@ "index": 0, "logprobs": null, "message": { - "content": "As of your last question, the weather in Brooklyn, New York, is typically hot and humid throughout the year. The suburbs around New York City are jealously sheltered, and at least in the Lower Bronx, there are very few outdoor environments to appreciate nature.\n\nIn terms of temperature, the warmest times of the year are from June to August, when average high temperatures typically range from around 73°F or 23°C", + "content": "Brooklyn, New York, is located in the northeastern part of the state of New York, and its weather is characterized by a mix of humid and temperate climates. The average temperature in Brooklyn during the winter months is around 32°F (0°C) and in the summer months is around 82°F (28°C).\n\nThe city experiences four distinct seasons, with the spring and fall being the most pleasant and", "name": null, "role": "assistant", "tool_calls": null @@ -13,7 +13,7 @@ "usage": null } ], - "created": 1724792495, + "created": 1725383598, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "chat.completion", diff --git a/integration-tests/models/__snapshots__/test_flash_phi35_moe/test_flash_llama_simple.json b/integration-tests/models/__snapshots__/test_flash_phi35_moe/test_flash_llama_simple.json deleted file mode 100644 index 41e5f02d..00000000 --- a/integration-tests/models/__snapshots__/test_flash_phi35_moe/test_flash_llama_simple.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "choices": [ - { - "finish_reason": "length", - "index": 0, - "logprobs": null, - "message": { - "content": "I'keeper services don't have real-time capabilities, however, I can guide you on how to find current weather conditions in Brooklyn, New York.\n\nTo get the most accurate", - "name": null, - "role": "assistant", - "tool_calls": null - }, - "usage": null - } - ], - "created": 1725304474, - "id": "", - "model": "microsoft/Phi-3.5-MoE-instruct", - "object": "chat.completion", - "system_fingerprint": "2.2.1-dev0-native", - "usage": { - "completion_tokens": 40, - "prompt_tokens": 31, - "total_tokens": 71 - } -} diff --git a/integration-tests/models/__snapshots__/test_flash_phi35_moe/test_flash_phi35_moe_simple.json b/integration-tests/models/__snapshots__/test_flash_phi35_moe/test_flash_phi35_moe_simple.json new file mode 100644 index 00000000..7ffdbff9 --- /dev/null +++ b/integration-tests/models/__snapshots__/test_flash_phi35_moe/test_flash_phi35_moe_simple.json @@ -0,0 +1,26 @@ +{ + "choices": [ + { + "finish_reason": "length", + "index": 0, + "logprobs": null, + "message": { + "content": "I'm an AI unable to provide real-time data, but I can guide you on how to find current weather conditions in Brooklyn, New York. You can check websites like weather.com or accuweather.com, or use apps like The Weather Channel or AccuWeather on your smartphone. Alternatively, you can ask your voice assistant like Google Assistant or Siri for real-time updates.\n\nFor your information, I hope you'll have a", + "name": null, + "role": "assistant", + "tool_calls": null + }, + "usage": null + } + ], + "created": 1725383029, + "id": "", + "model": "microsoft/Phi-3.5-MoE-instruct", + "object": "chat.completion", + "system_fingerprint": "2.2.1-dev0-native", + "usage": { + "completion_tokens": 100, + "prompt_tokens": 31, + "total_tokens": 131 + } +} diff --git a/integration-tests/models/test_chat_llama.py b/integration-tests/models/test_chat_llama.py index 7d24add3..f7ed656c 100644 --- a/integration-tests/models/test_chat_llama.py +++ b/integration-tests/models/test_chat_llama.py @@ -35,6 +35,6 @@ async def test_flash_llama_simple(flash_llama_chat, response_snapshot): print(repr(response.choices[0].message.content)) assert ( response.choices[0].message.content - == "As of your last question, the weather in Brooklyn, New York, is typically hot and humid throughout the year. The suburbs around New York City are jealously sheltered, and at least in the Lower Bronx, there are very few outdoor environments to appreciate nature.\n\nIn terms of temperature, the warmest times of the year are from June to August, when average high temperatures typically range from around 73°F or 23°C" + == "Brooklyn, New York, is located in the northeastern part of the state of New York, and its weather is characterized by a mix of humid and temperate climates. The average temperature in Brooklyn during the winter months is around 32°F (0°C) and in the summer months is around 82°F (28°C).\n\nThe city experiences four distinct seasons, with the spring and fall being the most pleasant and" ) assert response == response_snapshot diff --git a/integration-tests/models/test_flash_phi35_moe.py b/integration-tests/models/test_flash_phi35_moe.py index 177c7f24..d55b1ec5 100644 --- a/integration-tests/models/test_flash_phi35_moe.py +++ b/integration-tests/models/test_flash_phi35_moe.py @@ -2,7 +2,7 @@ import pytest @pytest.fixture(scope="module") -def flash_llama_chat_handle(launcher): +def flash_phi35_moe_chat_handle(launcher): with launcher( "microsoft/Phi-3.5-MoE-instruct", num_shard=4, cuda_graphs=[1, 2] ) as handle: @@ -10,15 +10,15 @@ def flash_llama_chat_handle(launcher): @pytest.fixture(scope="module") -async def flash_llama_chat(flash_llama_chat_handle): - await flash_llama_chat_handle.health(300) - return flash_llama_chat_handle.client +async def flash_phi35_moe_chat(flash_phi35_moe_chat_handle): + await flash_phi35_moe_chat_handle.health(300) + return flash_phi35_moe_chat_handle.client @pytest.mark.private -async def test_flash_llama_simple(flash_llama_chat, response_snapshot): - response = await flash_llama_chat.chat( - max_tokens=40, +async def test_flash_phi35_moe_simple(flash_phi35_moe_chat, response_snapshot): + response = await flash_phi35_moe_chat.chat( + max_tokens=100, seed=1337, messages=[ { @@ -32,9 +32,8 @@ async def test_flash_llama_simple(flash_llama_chat, response_snapshot): ], ) - print(repr(response.choices[0].message.content)) assert ( response.choices[0].message.content - == "I'keeper services don't have real-time capabilities, however, I can guide you on how to find current weather conditions in Brooklyn, New York.\n\nTo get the most accurate" + == "I'm an AI unable to provide real-time data, but I can guide you on how to find current weather conditions in Brooklyn, New York. You can check websites like weather.com or accuweather.com, or use apps like The Weather Channel or AccuWeather on your smartphone. Alternatively, you can ask your voice assistant like Google Assistant or Siri for real-time updates.\n\nFor your information, I hope you'll have a" ) assert response == response_snapshot diff --git a/integration-tests/models/test_tools_llama.py b/integration-tests/models/test_tools_llama.py index 9855cfda..c337afa1 100644 --- a/integration-tests/models/test_tools_llama.py +++ b/integration-tests/models/test_tools_llama.py @@ -4,7 +4,9 @@ import pytest @pytest.fixture(scope="module") def flash_llama_grammar_tools_handle(launcher): with launcher( - "TinyLlama/TinyLlama-1.1B-Chat-v1.0", num_shard=2, disable_grammar_support=False + "meta-llama/Meta-Llama-3.1-8B-Instruct", + num_shard=2, + disable_grammar_support=False, ) as handle: yield handle @@ -208,7 +210,7 @@ async def test_flash_llama_grammar_tools_stream( async for response in responses: count += 1 - assert count == 48 + assert count == 28 assert response == response_snapshot