Add integration test for AWQ MoE

2025-09-12 04:44:52 +00:00 · 2024-10-08 08:00:23 +00:00 · 2024-10-08 08:00:23 +00:00 · a1c5f38c87
commit a1c5f38c87
parent df962ca864
4 changed files with 694 additions and 0 deletions
--- a/integration-tests/models/snapshots/test_flash_mixtral_awq/test_flash_mixtral_awq.json
+++ b/integration-tests/models/snapshots/test_flash_mixtral_awq/test_flash_mixtral_awq.json
@ -0,0 +1,104 @@
 {
  "details": {
    "best_of_sequences": null,
    "finish_reason": "length",
    "generated_tokens": 10,
    "prefill": [
      {
        "id": 1,
        "logprob": null,
        "text": "<s>"
      },
      {
        "id": 1824,
        "logprob": -12.296875,
        "text": "What"
      },
      {
        "id": 349,
        "logprob": -0.97216797,
        "text": "is"
      },
      {
        "id": 3534,
        "logprob": -10.1796875,
        "text": "deep"
      },
      {
        "id": 5168,
        "logprob": -0.9658203,
        "text": "learning"
      },
      {
        "id": 28804,
        "logprob": -0.44384766,
        "text": "?"
      }
    ],
    "seed": null,
    "tokens": [
      {
        "id": 13,
        "logprob": -0.50878906,
        "special": false,
        "text": "\n"
      },
      {
        "id": 13,
        "logprob": -0.8876953,
        "special": false,
        "text": "\n"
      },
      {
        "id": 23229,
        "logprob": -0.15124512,
        "special": false,
        "text": "Deep"
      },
      {
        "id": 5168,
        "logprob": -0.030288696,
        "special": false,
        "text": " learning"
      },
      {
        "id": 349,
        "logprob": -0.16687012,
        "special": false,
        "text": " is"
      },
      {
        "id": 264,
        "logprob": -0.17858887,
        "special": false,
        "text": " a"
      },
      {
        "id": 19804,
        "logprob": -0.8046875,
        "special": false,
        "text": " subset"
      },
      {
        "id": 302,
        "logprob": -0.007205963,
        "special": false,
        "text": " of"
      },
      {
        "id": 5599,
        "logprob": -0.090026855,
        "special": false,
        "text": " machine"
      },
      {
        "id": 5168,
        "logprob": -0.0030670166,
        "special": false,
        "text": " learning"
      }
    ],
    "top_tokens": null
  },
  "generated_text": "\n\nDeep learning is a subset of machine learning"
 }
--- a/integration-tests/models/snapshots/test_flash_mixtral_awq/test_flash_mixtral_awq_all_params.json
+++ b/integration-tests/models/snapshots/test_flash_mixtral_awq/test_flash_mixtral_awq_all_params.json
@ -0,0 +1,99 @@
 {
  "details": {
    "best_of_sequences": null,
    "finish_reason": "length",
    "generated_tokens": 10,
    "prefill": [
      {
        "id": 1,
        "logprob": null,
        "text": "<s>"
      },
      {
        "id": 349,
        "logprob": -13.921875,
        "text": "is"
      },
      {
        "id": 3534,
        "logprob": -11.2265625,
        "text": "deep"
      },
      {
        "id": 5168,
        "logprob": -2.3886719,
        "text": "learning"
      },
      {
        "id": 28804,
        "logprob": -4.7109375,
        "text": "?"
      }
    ],
    "seed": 0,
    "tokens": [
      {
        "id": 13,
        "logprob": 0.0,
        "special": false,
        "text": "\n"
      },
      {
        "id": 23229,
        "logprob": -0.5229492,
        "special": false,
        "text": "Deep"
      },
      {
        "id": 17504,
        "logprob": 0.0,
        "special": false,
        "text": " Learning"
      },
      {
        "id": 349,
        "logprob": -0.5151367,
        "special": false,
        "text": " is"
      },
      {
        "id": 264,
        "logprob": 0.0,
        "special": false,
        "text": " a"
      },
      {
        "id": 19804,
        "logprob": 0.0,
        "special": false,
        "text": " subset"
      },
      {
        "id": 302,
        "logprob": 0.0,
        "special": false,
        "text": " of"
      },
      {
        "id": 13253,
        "logprob": -1.3359375,
        "special": false,
        "text": " Machine"
      },
      {
        "id": 17504,
        "logprob": 0.0,
        "special": false,
        "text": " Learning"
      },
      {
        "id": 28725,
        "logprob": 0.0,
        "special": false,
        "text": ","
      }
    ],
    "top_tokens": null
  },
  "generated_text": "What is deep learning?\nDeep Learning is a subset of Machine Learning,"
 }
--- a/integration-tests/models/snapshots/test_flash_mixtral_awq/test_flash_mixtral_awq_load.json
+++ b/integration-tests/models/snapshots/test_flash_mixtral_awq/test_flash_mixtral_awq_load.json
@ -0,0 +1,418 @@
 [
  {
    "details": {
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
      "prefill": [
        {
          "id": 1,
          "logprob": null,
          "text": "<s>"
        },
        {
          "id": 1824,
          "logprob": -12.296875,
          "text": "What"
        },
        {
          "id": 349,
          "logprob": -0.97216797,
          "text": "is"
        },
        {
          "id": 3534,
          "logprob": -10.1796875,
          "text": "deep"
        },
        {
          "id": 5168,
          "logprob": -0.9658203,
          "text": "learning"
        },
        {
          "id": 28804,
          "logprob": -0.44384766,
          "text": "?"
        }
      ],
      "seed": null,
      "tokens": [
        {
          "id": 13,
          "logprob": -0.50878906,
          "special": false,
          "text": "\n"
        },
        {
          "id": 13,
          "logprob": -0.8876953,
          "special": false,
          "text": "\n"
        },
        {
          "id": 23229,
          "logprob": -0.15136719,
          "special": false,
          "text": "Deep"
        },
        {
          "id": 5168,
          "logprob": -0.030273438,
          "special": false,
          "text": " learning"
        },
        {
          "id": 349,
          "logprob": -0.1665039,
          "special": false,
          "text": " is"
        },
        {
          "id": 264,
          "logprob": -0.1776123,
          "special": false,
          "text": " a"
        },
        {
          "id": 19804,
          "logprob": -0.8076172,
          "special": false,
          "text": " subset"
        },
        {
          "id": 302,
          "logprob": -0.007183075,
          "special": false,
          "text": " of"
        },
        {
          "id": 5599,
          "logprob": -0.090148926,
          "special": false,
          "text": " machine"
        },
        {
          "id": 5168,
          "logprob": -0.0030670166,
          "special": false,
          "text": " learning"
        }
      ],
      "top_tokens": null
    },
    "generated_text": "\n\nDeep learning is a subset of machine learning"
  },
  {
    "details": {
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
      "prefill": [
        {
          "id": 1,
          "logprob": null,
          "text": "<s>"
        },
        {
          "id": 1824,
          "logprob": -12.34375,
          "text": "What"
        },
        {
          "id": 349,
          "logprob": -0.96728516,
          "text": "is"
        },
        {
          "id": 3534,
          "logprob": -10.1796875,
          "text": "deep"
        },
        {
          "id": 5168,
          "logprob": -0.97265625,
          "text": "learning"
        },
        {
          "id": 28804,
          "logprob": -0.44189453,
          "text": "?"
        }
      ],
      "seed": null,
      "tokens": [
        {
          "id": 13,
          "logprob": -0.51220703,
          "special": false,
          "text": "\n"
        },
        {
          "id": 13,
          "logprob": -0.87402344,
          "special": false,
          "text": "\n"
        },
        {
          "id": 23229,
          "logprob": -0.15039062,
          "special": false,
          "text": "Deep"
        },
        {
          "id": 5168,
          "logprob": -0.030288696,
          "special": false,
          "text": " learning"
        },
        {
          "id": 349,
          "logprob": -0.1652832,
          "special": false,
          "text": " is"
        },
        {
          "id": 264,
          "logprob": -0.17858887,
          "special": false,
          "text": " a"
        },
        {
          "id": 19804,
          "logprob": -0.81103516,
          "special": false,
          "text": " subset"
        },
        {
          "id": 302,
          "logprob": -0.007183075,
          "special": false,
          "text": " of"
        },
        {
          "id": 5599,
          "logprob": -0.08880615,
          "special": false,
          "text": " machine"
        },
        {
          "id": 5168,
          "logprob": -0.0030612946,
          "special": false,
          "text": " learning"
        }
      ],
      "top_tokens": null
    },
    "generated_text": "\n\nDeep learning is a subset of machine learning"
  },
  {
    "details": {
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
      "prefill": [
        {
          "id": 1,
          "logprob": null,
          "text": "<s>"
        },
        {
          "id": 1824,
          "logprob": -12.34375,
          "text": "What"
        },
        {
          "id": 349,
          "logprob": -0.96728516,
          "text": "is"
        },
        {
          "id": 3534,
          "logprob": -10.1796875,
          "text": "deep"
        },
        {
          "id": 5168,
          "logprob": -0.97265625,
          "text": "learning"
        },
        {
          "id": 28804,
          "logprob": -0.44189453,
          "text": "?"
        }
      ],
      "seed": null,
      "tokens": [
        {
          "id": 13,
          "logprob": -0.51220703,
          "special": false,
          "text": "\n"
        },
        {
          "id": 13,
          "logprob": -0.87402344,
          "special": false,
          "text": "\n"
        },
        {
          "id": 23229,
          "logprob": -0.15039062,
          "special": false,
          "text": "Deep"
        },
        {
          "id": 5168,
          "logprob": -0.030288696,
          "special": false,
          "text": " learning"
        },
        {
          "id": 349,
          "logprob": -0.1652832,
          "special": false,
          "text": " is"
        },
        {
          "id": 264,
          "logprob": -0.17858887,
          "special": false,
          "text": " a"
        },
        {
          "id": 19804,
          "logprob": -0.81103516,
          "special": false,
          "text": " subset"
        },
        {
          "id": 302,
          "logprob": -0.007183075,
          "special": false,
          "text": " of"
        },
        {
          "id": 5599,
          "logprob": -0.08880615,
          "special": false,
          "text": " machine"
        },
        {
          "id": 5168,
          "logprob": -0.0030612946,
          "special": false,
          "text": " learning"
        }
      ],
      "top_tokens": null
    },
    "generated_text": "\n\nDeep learning is a subset of machine learning"
  },
  {
    "details": {
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
      "prefill": [
        {
          "id": 1,
          "logprob": null,
          "text": "<s>"
        },
        {
          "id": 1824,
          "logprob": -12.34375,
          "text": "What"
        },
        {
          "id": 349,
          "logprob": -0.96728516,
          "text": "is"
        },
        {
          "id": 3534,
          "logprob": -10.1796875,
          "text": "deep"
        },
        {
          "id": 5168,
          "logprob": -0.97265625,
          "text": "learning"
        },
        {
          "id": 28804,
          "logprob": -0.44189453,
          "text": "?"
        }
      ],
      "seed": null,
      "tokens": [
        {
          "id": 13,
          "logprob": -0.51220703,
          "special": false,
          "text": "\n"
        },
        {
          "id": 13,
          "logprob": -0.87402344,
          "special": false,
          "text": "\n"
        },
        {
          "id": 23229,
          "logprob": -0.15039062,
          "special": false,
          "text": "Deep"
        },
        {
          "id": 5168,
          "logprob": -0.030288696,
          "special": false,
          "text": " learning"
        },
        {
          "id": 349,
          "logprob": -0.1652832,
          "special": false,
          "text": " is"
        },
        {
          "id": 264,
          "logprob": -0.17858887,
          "special": false,
          "text": " a"
        },
        {
          "id": 19804,
          "logprob": -0.81103516,
          "special": false,
          "text": " subset"
        },
        {
          "id": 302,
          "logprob": -0.007183075,
          "special": false,
          "text": " of"
        },
        {
          "id": 5599,
          "logprob": -0.08880615,
          "special": false,
          "text": " machine"
        },
        {
          "id": 5168,
          "logprob": -0.0030612946,
          "special": false,
          "text": " learning"
        }
      ],
      "top_tokens": null
    },
    "generated_text": "\n\nDeep learning is a subset of machine learning"
  }
 ]
--- a/integration-tests/models/test_flash_mixtral_awq.py
+++ b/integration-tests/models/test_flash_mixtral_awq.py
@ -0,0 +1,73 @@
 import pytest
@pytest.fixture(scope="module")
 def flash_mixtral_awq_handle(launcher):
    with launcher("casperhansen/mixtral-instruct-awq", num_shard=2) as handle:
        yield handle
@pytest.fixture(scope="module")
 async def flash_mixtral_awq(flash_mixtral_awq_handle):
    await flash_mixtral_awq_handle.health(300)
    return flash_mixtral_awq_handle.client
@pytest.mark.asyncio
 async def test_flash_mixtral_awq(flash_mixtral_awq, response_snapshot):
    response = await flash_mixtral_awq.generate(
        "What is deep learning?", max_new_tokens=10, decoder_input_details=True
    )
    assert response.details.generated_tokens == 10
    assert (
        response.generated_text == "\n\nDeep learning is a subset of machine learning"
    )
    assert response == response_snapshot
@pytest.mark.asyncio
 async def test_flash_mixtral_awq_all_params(flash_mixtral_awq, response_snapshot):
    response = await flash_mixtral_awq.generate(
        "What is deep learning?",
        max_new_tokens=10,
        repetition_penalty=1.2,
        return_full_text=True,
        stop_sequences=["test"],
        temperature=0.5,
        top_p=0.9,
        top_k=10,
        truncate=5,
        typical_p=0.9,
        watermark=True,
        decoder_input_details=True,
        seed=0,
    )
    assert response.details.generated_tokens == 10
    assert (
        response.generated_text
        == "What is deep learning?\nDeep Learning is a subset of Machine Learning,"
    )
    assert response == response_snapshot
@pytest.mark.asyncio
 async def test_flash_mixtral_awq_load(
    flash_mixtral_awq, generate_load, response_snapshot
 ):
    responses = await generate_load(
        flash_mixtral_awq, "What is deep learning?", max_new_tokens=10, n=4
    )
    assert len(responses) == 4
    assert responses[0].details.generated_tokens == 10
    assert (
        responses[0].generated_text
        == "\n\nDeep learning is a subset of machine learning"
    )
    assert all(
        [r.generated_text == responses[0].generated_text for r in responses]
    ), f"{[r.generated_text  for r in responses]}"
    assert responses == response_snapshot