Add the usual model tests

This commit is contained in:
Daniël de Kok 2024-09-25 10:27:29 +00:00
parent 245d6d8f7c
commit e3e483c901
5 changed files with 704 additions and 48 deletions

View File

@ -0,0 +1,109 @@
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 1724,
"logprob": null,
"text": "What"
},
{
"id": 338,
"logprob": -0.7133789,
"text": "is"
},
{
"id": 16030,
"logprob": -13.9296875,
"text": "gradient"
},
{
"id": 26815,
"logprob": -0.048919678,
"text": "descent"
},
{
"id": 29973,
"logprob": -3.0078125,
"text": "?"
},
{
"id": 13,
"logprob": -2.8105469,
"text": "\n"
},
{
"id": 13,
"logprob": -0.84521484,
"text": "\n"
}
],
"seed": null,
"tokens": [
{
"id": 25584,
"logprob": -0.017028809,
"special": false,
"text": "Grad"
},
{
"id": 993,
"logprob": -0.0027313232,
"special": false,
"text": "ient"
},
{
"id": 26815,
"logprob": -0.023254395,
"special": false,
"text": " descent"
},
{
"id": 338,
"logprob": -2.0623207e-05,
"special": false,
"text": " is"
},
{
"id": 263,
"logprob": -0.5361328,
"special": false,
"text": " a"
},
{
"id": 937,
"logprob": -0.17578125,
"special": false,
"text": " first"
},
{
"id": 29899,
"logprob": 0.0,
"special": false,
"text": "-"
},
{
"id": 2098,
"logprob": -0.00011539459,
"special": false,
"text": "order"
},
{
"id": 13883,
"logprob": -0.47436523,
"special": false,
"text": " optimization"
},
{
"id": 5687,
"logprob": -0.00027680397,
"special": false,
"text": " algorithm"
}
],
"top_tokens": null
},
"generated_text": "Gradient descent is a first-order optimization algorithm"
}

View File

@ -0,0 +1,99 @@
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 16030,
"logprob": null,
"text": "gradient"
},
{
"id": 26815,
"logprob": -6.4960938,
"text": "descent"
},
{
"id": 29973,
"logprob": -5.1484375,
"text": "?"
},
{
"id": 13,
"logprob": -4.0351562,
"text": "\n"
},
{
"id": 13,
"logprob": -5.2265625,
"text": "\n"
}
],
"seed": 0,
"tokens": [
{
"id": 10994,
"logprob": -1.1542969,
"special": false,
"text": "Hello"
},
{
"id": 29991,
"logprob": 0.0,
"special": false,
"text": "!"
},
{
"id": 739,
"logprob": 0.0,
"special": false,
"text": " It"
},
{
"id": 2444,
"logprob": -0.42260742,
"special": false,
"text": " seems"
},
{
"id": 366,
"logprob": 0.0,
"special": false,
"text": " you"
},
{
"id": 29915,
"logprob": 0.0,
"special": false,
"text": "'"
},
{
"id": 276,
"logprob": -0.9838867,
"special": false,
"text": "re"
},
{
"id": 3211,
"logprob": 0.0,
"special": false,
"text": " address"
},
{
"id": 292,
"logprob": 0.0,
"special": false,
"text": "ing"
},
{
"id": 263,
"logprob": -0.15124512,
"special": false,
"text": " a"
}
],
"top_tokens": null
},
"generated_text": "What is gradient descent?\n\nHello! It seems you're addressing a"
}

View File

@ -0,0 +1,438 @@
[
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 1724,
"logprob": null,
"text": "What"
},
{
"id": 338,
"logprob": -0.7133789,
"text": "is"
},
{
"id": 16030,
"logprob": -13.9296875,
"text": "gradient"
},
{
"id": 26815,
"logprob": -0.048919678,
"text": "descent"
},
{
"id": 29973,
"logprob": -3.0078125,
"text": "?"
},
{
"id": 13,
"logprob": -2.8105469,
"text": "\n"
},
{
"id": 13,
"logprob": -0.84521484,
"text": "\n"
}
],
"seed": null,
"tokens": [
{
"id": 25584,
"logprob": -0.017028809,
"special": false,
"text": "Grad"
},
{
"id": 993,
"logprob": -0.0028476715,
"special": false,
"text": "ient"
},
{
"id": 26815,
"logprob": -0.023971558,
"special": false,
"text": " descent"
},
{
"id": 338,
"logprob": -2.0384789e-05,
"special": false,
"text": " is"
},
{
"id": 263,
"logprob": -0.5229492,
"special": false,
"text": " a"
},
{
"id": 937,
"logprob": -0.17602539,
"special": false,
"text": " first"
},
{
"id": 29899,
"logprob": 0.0,
"special": false,
"text": "-"
},
{
"id": 2098,
"logprob": -0.000116467476,
"special": false,
"text": "order"
},
{
"id": 13883,
"logprob": -0.47436523,
"special": false,
"text": " optimization"
},
{
"id": 5687,
"logprob": -0.00027871132,
"special": false,
"text": " algorithm"
}
],
"top_tokens": null
},
"generated_text": "Gradient descent is a first-order optimization algorithm"
},
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 1724,
"logprob": null,
"text": "What"
},
{
"id": 338,
"logprob": -0.7128906,
"text": "is"
},
{
"id": 16030,
"logprob": -13.9375,
"text": "gradient"
},
{
"id": 26815,
"logprob": -0.05053711,
"text": "descent"
},
{
"id": 29973,
"logprob": -3.0058594,
"text": "?"
},
{
"id": 13,
"logprob": -2.8242188,
"text": "\n"
},
{
"id": 13,
"logprob": -0.84521484,
"text": "\n"
}
],
"seed": null,
"tokens": [
{
"id": 25584,
"logprob": -0.018859863,
"special": false,
"text": "Grad"
},
{
"id": 993,
"logprob": -0.002822876,
"special": false,
"text": "ient"
},
{
"id": 26815,
"logprob": -0.023254395,
"special": false,
"text": " descent"
},
{
"id": 338,
"logprob": -2.0384789e-05,
"special": false,
"text": " is"
},
{
"id": 263,
"logprob": -0.5229492,
"special": false,
"text": " a"
},
{
"id": 937,
"logprob": -0.17126465,
"special": false,
"text": " first"
},
{
"id": 29899,
"logprob": 0.0,
"special": false,
"text": "-"
},
{
"id": 2098,
"logprob": -0.0001155138,
"special": false,
"text": "order"
},
{
"id": 13883,
"logprob": -0.47436523,
"special": false,
"text": " optimization"
},
{
"id": 5687,
"logprob": -0.00027036667,
"special": false,
"text": " algorithm"
}
],
"top_tokens": null
},
"generated_text": "Gradient descent is a first-order optimization algorithm"
},
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 1724,
"logprob": null,
"text": "What"
},
{
"id": 338,
"logprob": -0.71484375,
"text": "is"
},
{
"id": 16030,
"logprob": -13.9375,
"text": "gradient"
},
{
"id": 26815,
"logprob": -0.049346924,
"text": "descent"
},
{
"id": 29973,
"logprob": -3.0078125,
"text": "?"
},
{
"id": 13,
"logprob": -2.8242188,
"text": "\n"
},
{
"id": 13,
"logprob": -0.86328125,
"text": "\n"
}
],
"seed": null,
"tokens": [
{
"id": 25584,
"logprob": -0.017196655,
"special": false,
"text": "Grad"
},
{
"id": 993,
"logprob": -0.0028438568,
"special": false,
"text": "ient"
},
{
"id": 26815,
"logprob": -0.023254395,
"special": false,
"text": " descent"
},
{
"id": 338,
"logprob": -2.026558e-05,
"special": false,
"text": " is"
},
{
"id": 263,
"logprob": -0.5229492,
"special": false,
"text": " a"
},
{
"id": 937,
"logprob": -0.17602539,
"special": false,
"text": " first"
},
{
"id": 29899,
"logprob": 0.0,
"special": false,
"text": "-"
},
{
"id": 2098,
"logprob": -0.00011622906,
"special": false,
"text": "order"
},
{
"id": 13883,
"logprob": -0.48608398,
"special": false,
"text": " optimization"
},
{
"id": 5687,
"logprob": -0.00027894974,
"special": false,
"text": " algorithm"
}
],
"top_tokens": null
},
"generated_text": "Gradient descent is a first-order optimization algorithm"
},
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 1724,
"logprob": null,
"text": "What"
},
{
"id": 338,
"logprob": -0.7192383,
"text": "is"
},
{
"id": 16030,
"logprob": -13.9375,
"text": "gradient"
},
{
"id": 26815,
"logprob": -0.050445557,
"text": "descent"
},
{
"id": 29973,
"logprob": -3.0078125,
"text": "?"
},
{
"id": 13,
"logprob": -2.8242188,
"text": "\n"
},
{
"id": 13,
"logprob": -0.8276367,
"text": "\n"
}
],
"seed": null,
"tokens": [
{
"id": 25584,
"logprob": -0.01727295,
"special": false,
"text": "Grad"
},
{
"id": 993,
"logprob": -0.0027542114,
"special": false,
"text": "ient"
},
{
"id": 26815,
"logprob": -0.023254395,
"special": false,
"text": " descent"
},
{
"id": 338,
"logprob": -2.0384789e-05,
"special": false,
"text": " is"
},
{
"id": 263,
"logprob": -0.5229492,
"special": false,
"text": " a"
},
{
"id": 937,
"logprob": -0.17126465,
"special": false,
"text": " first"
},
{
"id": 29899,
"logprob": 0.0,
"special": false,
"text": "-"
},
{
"id": 2098,
"logprob": -0.00011301041,
"special": false,
"text": "order"
},
{
"id": 13883,
"logprob": -0.48608398,
"special": false,
"text": " optimization"
},
{
"id": 5687,
"logprob": -0.00027894974,
"special": false,
"text": " algorithm"
}
],
"top_tokens": null
},
"generated_text": "Gradient descent is a first-order optimization algorithm"
}
]

View File

@ -1,26 +0,0 @@
{
"choices": [
{
"finish_reason": "length",
"index": 0,
"logprobs": null,
"message": {
"content": "I'm an AI unable to provide real-time data, but I can guide you on how to find current weather conditions in Brooklyn, New York. You can check websites like weather.com or accuweather.com, or use apps like The Weather Channel or AccuWeather on your smartphone. Alternatively, you can ask your voice assistant like Google Assistant or Siri for real-time updates.\n\nFor your information, I hope you'll have a",
"name": null,
"role": "assistant",
"tool_calls": null
},
"usage": null
}
],
"created": 1725383029,
"id": "",
"model": "microsoft/Phi-3.5-MoE-instruct",
"object": "chat.completion",
"system_fingerprint": "2.2.1-dev0-native",
"usage": {
"completion_tokens": 100,
"prompt_tokens": 31,
"total_tokens": 131
}
}

View File

@ -2,38 +2,74 @@ import pytest
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
def flash_phi35_moe_chat_handle(launcher): def flash_phi35_moe_handle(launcher):
with launcher( with launcher(
"microsoft/Phi-3.5-MoE-instruct", num_shard=4, cuda_graphs=[1, 2] "microsoft/Phi-3.5-MoE-instruct",
num_shard=4,
) as handle: ) as handle:
yield handle yield handle
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
async def flash_phi35_moe_chat(flash_phi35_moe_chat_handle): async def flash_phi35_moe(flash_phi35_moe_handle):
await flash_phi35_moe_chat_handle.health(300) await flash_phi35_moe_handle.health(300)
return flash_phi35_moe_chat_handle.client return flash_phi35_moe_handle.client
@pytest.mark.private @pytest.mark.asyncio
async def test_flash_phi35_moe_simple(flash_phi35_moe_chat, response_snapshot): async def test_flash_phi35_moe(flash_phi35_moe, response_snapshot):
response = await flash_phi35_moe_chat.chat( response = await flash_phi35_moe.generate(
max_tokens=100, "What is gradient descent?\n\n", max_new_tokens=10, decoder_input_details=True
seed=1337,
messages=[
{
"role": "system",
"content": "Youre a helpful assistant! Answer the users question best you can.",
},
{
"role": "user",
"content": "What is the weather like in Brooklyn, New York?",
},
],
) )
assert response.details.generated_tokens == 10
assert ( assert (
response.choices[0].message.content response.generated_text
== "I'm an AI unable to provide real-time data, but I can guide you on how to find current weather conditions in Brooklyn, New York. You can check websites like weather.com or accuweather.com, or use apps like The Weather Channel or AccuWeather on your smartphone. Alternatively, you can ask your voice assistant like Google Assistant or Siri for real-time updates.\n\nFor your information, I hope you'll have a" == "Gradient descent is a first-order optimization algorithm"
) )
assert response == response_snapshot assert response == response_snapshot
@pytest.mark.asyncio
async def test_flash_phi35_moe_all_params(flash_phi35_moe, response_snapshot):
response = await flash_phi35_moe.generate(
"What is gradient descent?\n\n",
max_new_tokens=10,
repetition_penalty=1.2,
return_full_text=True,
stop_sequences=["test"],
temperature=0.5,
top_p=0.9,
top_k=10,
truncate=5,
typical_p=0.9,
watermark=True,
decoder_input_details=True,
seed=0,
)
assert response.details.generated_tokens == 10
assert (
response.generated_text
== "What is gradient descent?\n\nHello! It seems you're addressing a"
)
assert response == response_snapshot
@pytest.mark.asyncio
async def test_flash_phi35_moe_load(flash_phi35_moe, generate_load, response_snapshot):
responses = await generate_load(
flash_phi35_moe, "What is gradient descent?\n\n", max_new_tokens=10, n=4
)
assert len(responses) == 4
assert responses[0].details.generated_tokens == 10
assert (
responses[0].generated_text
== "Gradient descent is a first-order optimization algorithm"
)
assert all(
[r.generated_text == responses[0].generated_text for r in responses]
), f"{[r.generated_text for r in responses]}"
assert responses == response_snapshot