From a29893486e3af0b0041b380910e80bc96180947c Mon Sep 17 00:00:00 2001
From: Jason Cheng <jasoncky96@gmail.com>
Date: Sat, 24 Feb 2024 15:42:56 +0800
Subject: [PATCH] Added test cases

---
 .../test_flash_qwen2/test_flash_qwen2.json    |  84 +++++
 .../test_flash_qwen2_all_params.json          |  84 +++++
 .../test_flash_qwen2_load.json                | 338 ++++++++++++++++++
 integration-tests/models/test_flash_qwen2.py  |  61 ++++
 4 files changed, 567 insertions(+)
 create mode 100644 integration-tests/models/__snapshots__/test_flash_qwen2/test_flash_qwen2.json
 create mode 100644 integration-tests/models/__snapshots__/test_flash_qwen2/test_flash_qwen2_all_params.json
 create mode 100644 integration-tests/models/__snapshots__/test_flash_qwen2/test_flash_qwen2_load.json
 create mode 100644 integration-tests/models/test_flash_qwen2.py

diff --git a/integration-tests/models/__snapshots__/test_flash_qwen2/test_flash_qwen2.json b/integration-tests/models/__snapshots__/test_flash_qwen2/test_flash_qwen2.json
new file mode 100644
index 00000000..2e3906ce
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_flash_qwen2/test_flash_qwen2.json
@@ -0,0 +1,84 @@
+{
+  "details": {
+    "best_of_sequences": null,
+    "finish_reason": "length",
+    "generated_tokens": 10,
+    "prefill": [
+      {
+          "id": 2271,
+          "text": "Test",
+          "logprob": null
+      },
+      {
+          "id": 1681,
+          "text": " request",
+          "logprob": -7.0351562
+      }
+    ],
+    "seed": null,
+    "tokens": [
+      {
+          "id": 369,
+          "text": " for",
+          "logprob": -2.1914062,
+          "special": false
+      },
+      {
+          "id": 279,
+          "text": " the",
+          "logprob": -2.6210938,
+          "special": false
+      },
+      {
+          "id": 2701,
+          "text": " following",
+          "logprob": -3.6445312,
+          "special": false
+      },
+      {
+          "id": 729,
+          "text": " function",
+          "logprob": -2.9648438,
+          "special": false
+      },
+      {
+          "id": 271,
+          "text": "\n\n",
+          "logprob": -1.9111328,
+          "special": false
+      },
+      {
+          "id": 31946,
+          "text": "Inputs",
+          "logprob": -1.6855469,
+          "special": false
+      },
+      {
+          "id": 25,
+          "text": ":",
+          "logprob": -1.6093254e-05,
+          "special": false
+      },
+      {
+          "id": 707,
+          "text": " def",
+          "logprob": -0.5678711,
+          "special": false
+      },
+      {
+          "id": 1477,
+          "text": " find",
+          "logprob": -2.5917969,
+          "special": false
+      },
+      {
+          "id": 6345,
+          "text": "_max",
+          "logprob": -1.8349609,
+          "special": false
+      }
+  ],
+    "top_tokens": null
+  },
+  "generated_text": " for the following function\n\nInputs: def find_max"
+}
diff --git a/integration-tests/models/__snapshots__/test_flash_qwen2/test_flash_qwen2_all_params.json b/integration-tests/models/__snapshots__/test_flash_qwen2/test_flash_qwen2_all_params.json
new file mode 100644
index 00000000..bdaab6f2
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_flash_qwen2/test_flash_qwen2_all_params.json
@@ -0,0 +1,84 @@
+{
+  "details": {
+    "best_of_sequences": null,
+    "finish_reason": "length",
+    "generated_tokens": 10,
+    "prefill": [
+      {
+          "id": 2271,
+          "text": "Test",
+          "logprob": null
+      },
+      {
+          "id": 1681,
+          "text": " request",
+          "logprob": -7.0351562
+      }
+    ],
+    "seed": 0,
+    "tokens": [
+      {
+          "id": 311,
+          "text": " to",
+          "logprob": -1.4472656,
+          "special": false
+      },
+      {
+          "id": 633,
+          "text": " get",
+          "logprob": -0.4741211,
+          "special": false
+      },
+      {
+          "id": 264,
+          "text": " a",
+          "logprob": 0.0,
+          "special": false
+      },
+      {
+          "id": 1140,
+          "text": " list",
+          "logprob": 0.0,
+          "special": false
+      },
+      {
+          "id": 315,
+          "text": " of",
+          "logprob": 0.0,
+          "special": false
+      },
+      {
+          "id": 678,
+          "text": " all",
+          "logprob": 0.0,
+          "special": false
+      },
+      {
+          "id": 279,
+          "text": " the",
+          "logprob": -0.2590332,
+          "special": false
+      },
+      {
+          "id": 3847,
+          "text": " users",
+          "logprob": -0.45239258,
+          "special": false
+      },
+      {
+          "id": 304,
+          "text": " in",
+          "logprob": -0.12322998,
+          "special": false
+      },
+      {
+          "id": 419,
+          "text": " this",
+          "logprob": -1.7275391,
+          "special": false
+      }
+    ],
+    "top_tokens": null
+  },
+  "generated_text": "Test request to get a list of all the users in this"
+}
diff --git a/integration-tests/models/__snapshots__/test_flash_qwen2/test_flash_qwen2_load.json b/integration-tests/models/__snapshots__/test_flash_qwen2/test_flash_qwen2_load.json
new file mode 100644
index 00000000..6f0b21e9
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_flash_qwen2/test_flash_qwen2_load.json
@@ -0,0 +1,338 @@
+[
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+            "id": 2271,
+            "text": "Test",
+            "logprob": null
+        },
+        {
+            "id": 1681,
+            "text": " request",
+            "logprob": -7.0351562
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+            "id": 369,
+            "text": " for",
+            "logprob": -2.1914062,
+            "special": false
+        },
+        {
+            "id": 279,
+            "text": " the",
+            "logprob": -2.6210938,
+            "special": false
+        },
+        {
+            "id": 2701,
+            "text": " following",
+            "logprob": -3.6445312,
+            "special": false
+        },
+        {
+            "id": 729,
+            "text": " function",
+            "logprob": -2.9648438,
+            "special": false
+        },
+        {
+            "id": 271,
+            "text": "\n\n",
+            "logprob": -1.9111328,
+            "special": false
+        },
+        {
+            "id": 31946,
+            "text": "Inputs",
+            "logprob": -1.6855469,
+            "special": false
+        },
+        {
+            "id": 25,
+            "text": ":",
+            "logprob": -1.6093254e-05,
+            "special": false
+        },
+        {
+            "id": 707,
+            "text": " def",
+            "logprob": -0.5678711,
+            "special": false
+        },
+        {
+            "id": 1477,
+            "text": " find",
+            "logprob": -2.5917969,
+            "special": false
+        },
+        {
+            "id": 6345,
+            "text": "_max",
+            "logprob": -1.8349609,
+            "special": false
+        }
+      ],
+      "top_tokens": null
+    },
+    "generated_text": " for the following function\n\nInputs: def find_max"
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+            "id": 2271,
+            "text": "Test",
+            "logprob": null
+        },
+        {
+            "id": 1681,
+            "text": " request",
+            "logprob": -7.0351562
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+            "id": 369,
+            "text": " for",
+            "logprob": -2.1914062,
+            "special": false
+        },
+        {
+            "id": 279,
+            "text": " the",
+            "logprob": -2.6210938,
+            "special": false
+        },
+        {
+            "id": 2701,
+            "text": " following",
+            "logprob": -3.6445312,
+            "special": false
+        },
+        {
+            "id": 729,
+            "text": " function",
+            "logprob": -2.9648438,
+            "special": false
+        },
+        {
+            "id": 271,
+            "text": "\n\n",
+            "logprob": -1.9111328,
+            "special": false
+        },
+        {
+            "id": 31946,
+            "text": "Inputs",
+            "logprob": -1.6855469,
+            "special": false
+        },
+        {
+            "id": 25,
+            "text": ":",
+            "logprob": -1.6093254e-05,
+            "special": false
+        },
+        {
+            "id": 707,
+            "text": " def",
+            "logprob": -0.5678711,
+            "special": false
+        },
+        {
+            "id": 1477,
+            "text": " find",
+            "logprob": -2.5917969,
+            "special": false
+        },
+        {
+            "id": 6345,
+            "text": "_max",
+            "logprob": -1.8349609,
+            "special": false
+        }
+      ],
+      "top_tokens": null
+    },
+    "generated_text": " for the following function\n\nInputs: def find_max"
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+            "id": 2271,
+            "text": "Test",
+            "logprob": null
+        },
+        {
+            "id": 1681,
+            "text": " request",
+            "logprob": -7.0351562
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+            "id": 369,
+            "text": " for",
+            "logprob": -2.1914062,
+            "special": false
+        },
+        {
+            "id": 279,
+            "text": " the",
+            "logprob": -2.6210938,
+            "special": false
+        },
+        {
+            "id": 2701,
+            "text": " following",
+            "logprob": -3.6445312,
+            "special": false
+        },
+        {
+            "id": 729,
+            "text": " function",
+            "logprob": -2.9648438,
+            "special": false
+        },
+        {
+            "id": 271,
+            "text": "\n\n",
+            "logprob": -1.9111328,
+            "special": false
+        },
+        {
+            "id": 31946,
+            "text": "Inputs",
+            "logprob": -1.6855469,
+            "special": false
+        },
+        {
+            "id": 25,
+            "text": ":",
+            "logprob": -1.6093254e-05,
+            "special": false
+        },
+        {
+            "id": 707,
+            "text": " def",
+            "logprob": -0.5678711,
+            "special": false
+        },
+        {
+            "id": 1477,
+            "text": " find",
+            "logprob": -2.5917969,
+            "special": false
+        },
+        {
+            "id": 6345,
+            "text": "_max",
+            "logprob": -1.8349609,
+            "special": false
+        }
+      ],
+      "top_tokens": null
+    },
+    "generated_text": " for the following function\n\nInputs: def find_max"
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+            "id": 2271,
+            "text": "Test",
+            "logprob": null
+        },
+        {
+            "id": 1681,
+            "text": " request",
+            "logprob": -7.0351562
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+            "id": 369,
+            "text": " for",
+            "logprob": -2.1914062,
+            "special": false
+        },
+        {
+            "id": 279,
+            "text": " the",
+            "logprob": -2.6210938,
+            "special": false
+        },
+        {
+            "id": 2701,
+            "text": " following",
+            "logprob": -3.6445312,
+            "special": false
+        },
+        {
+            "id": 729,
+            "text": " function",
+            "logprob": -2.9648438,
+            "special": false
+        },
+        {
+            "id": 271,
+            "text": "\n\n",
+            "logprob": -1.9111328,
+            "special": false
+        },
+        {
+            "id": 31946,
+            "text": "Inputs",
+            "logprob": -1.6855469,
+            "special": false
+        },
+        {
+            "id": 25,
+            "text": ":",
+            "logprob": -1.6093254e-05,
+            "special": false
+        },
+        {
+            "id": 707,
+            "text": " def",
+            "logprob": -0.5678711,
+            "special": false
+        },
+        {
+            "id": 1477,
+            "text": " find",
+            "logprob": -2.5917969,
+            "special": false
+        },
+        {
+            "id": 6345,
+            "text": "_max",
+            "logprob": -1.8349609,
+            "special": false
+        }
+      ],
+      "top_tokens": null
+    },
+    "generated_text": " for the following function\n\nInputs: def find_max"
+  }
+]
diff --git a/integration-tests/models/test_flash_qwen2.py b/integration-tests/models/test_flash_qwen2.py
new file mode 100644
index 00000000..e07ed553
--- /dev/null
+++ b/integration-tests/models/test_flash_qwen2.py
@@ -0,0 +1,61 @@
+import pytest
+
+
+@pytest.fixture(scope="module")
+def flash_qwen2_handle(launcher):
+    with launcher("Qwen/Qwen1.5-7B") as handle:
+        yield handle
+
+
+@pytest.fixture(scope="module")
+async def flash_qwen2(flash_qwen2_handle):
+    await flash_qwen2_handle.health(300)
+    return flash_qwen2_handle.client
+
+
+@pytest.mark.asyncio
+async def test_flash_qwen2(flash_qwen2, response_snapshot):
+    response = await flash_qwen2.generate(
+        "Test request", max_new_tokens=10, decoder_input_details=True
+    )
+
+    assert response.details.generated_tokens == 10
+    assert response.generated_text == " for the following function\n\nInputs: def find_max"
+    assert response == response_snapshot
+
+
+@pytest.mark.asyncio
+async def test_flash_qwen2_all_params(flash_qwen2, response_snapshot):
+    response = await flash_qwen2.generate(
+        "Test request",
+        max_new_tokens=10,
+        repetition_penalty=1.2,
+        return_full_text=True,
+        stop_sequences=["test"],
+        temperature=0.5,
+        top_p=0.9,
+        top_k=10,
+        truncate=5,
+        typical_p=0.9,
+        watermark=True,
+        decoder_input_details=True,
+        seed=0,
+    )
+
+    assert response.details.generated_tokens == 10
+    assert response == response_snapshot
+
+
+@pytest.mark.asyncio
+async def test_flash_qwen2_load(flash_qwen2, generate_load, response_snapshot):
+    responses = await generate_load(
+        flash_qwen2, "Test request", max_new_tokens=10, n=4
+    )
+
+    assert len(responses) == 4
+    assert all(
+        [r.generated_text == responses[0].generated_text for r in responses]
+    ), f"{[r.generated_text  for r in responses]}"
+    assert responses[0].generated_text == ": Let n = 10 - 1"
+
+    assert responses == response_snapshot