Making sure Olmo (transformers backend) works.

2025-09-09 19:34:53 +00:00 · 2025-03-05 12:24:25 +01:00 · 2025-03-05 12:24:25 +01:00 · d8e93a1baa
commit d8e93a1baa
parent 31766dad77
3 changed files with 404 additions and 0 deletions
--- a/integration-tests/models/snapshots/test_transformers_olmo/test_flash_llama_load.json
+++ b/integration-tests/models/snapshots/test_transformers_olmo/test_flash_llama_load.json
@ -0,0 +1,294 @@
+[
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 27,
+          "logprob": -1.3457031,
+          "special": false,
+          "text": ":"
+        },
+        {
+          "id": 187,
+          "logprob": -1.453125,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 187,
+          "logprob": -1.4111328,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 11202,
+          "logprob": -0.45898438,
+          "special": false,
+          "text": "```"
+        },
+        {
+          "id": 8456,
+          "logprob": -0.41918945,
+          "special": false,
+          "text": "json"
+        },
+        {
+          "id": 187,
+          "logprob": -0.003189087,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 92,
+          "logprob": -0.061187744,
+          "special": false,
+          "text": "{"
+        },
+        {
+          "id": 187,
+          "logprob": -0.009010315,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 50276,
+          "logprob": -0.484375,
+          "special": false,
+          "text": "  "
+        },
+        {
+          "id": 3,
+          "logprob": -0.0002951622,
+          "special": false,
+          "text": "\""
+        }
+      ],
+      "top_tokens": null
+    },
+    "generated_text": ":\n\n```json\n{\n  \""
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 27,
+          "logprob": -1.3457031,
+          "special": false,
+          "text": ":"
+        },
+        {
+          "id": 187,
+          "logprob": -1.453125,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 187,
+          "logprob": -1.4111328,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 11202,
+          "logprob": -0.45898438,
+          "special": false,
+          "text": "```"
+        },
+        {
+          "id": 8456,
+          "logprob": -0.41918945,
+          "special": false,
+          "text": "json"
+        },
+        {
+          "id": 187,
+          "logprob": -0.003189087,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 92,
+          "logprob": -0.061187744,
+          "special": false,
+          "text": "{"
+        },
+        {
+          "id": 187,
+          "logprob": -0.009010315,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 50276,
+          "logprob": -0.484375,
+          "special": false,
+          "text": "  "
+        },
+        {
+          "id": 3,
+          "logprob": -0.0002951622,
+          "special": false,
+          "text": "\""
+        }
+      ],
+      "top_tokens": null
+    },
+    "generated_text": ":\n\n```json\n{\n  \""
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 27,
+          "logprob": -1.3457031,
+          "special": false,
+          "text": ":"
+        },
+        {
+          "id": 187,
+          "logprob": -1.453125,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 187,
+          "logprob": -1.4111328,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 11202,
+          "logprob": -0.45898438,
+          "special": false,
+          "text": "```"
+        },
+        {
+          "id": 8456,
+          "logprob": -0.41918945,
+          "special": false,
+          "text": "json"
+        },
+        {
+          "id": 187,
+          "logprob": -0.003189087,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 92,
+          "logprob": -0.061187744,
+          "special": false,
+          "text": "{"
+        },
+        {
+          "id": 187,
+          "logprob": -0.009010315,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 50276,
+          "logprob": -0.484375,
+          "special": false,
+          "text": "  "
+        },
+        {
+          "id": 3,
+          "logprob": -0.0002951622,
+          "special": false,
+          "text": "\""
+        }
+      ],
+      "top_tokens": null
+    },
+    "generated_text": ":\n\n```json\n{\n  \""
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 27,
+          "logprob": -1.3457031,
+          "special": false,
+          "text": ":"
+        },
+        {
+          "id": 187,
+          "logprob": -1.453125,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 187,
+          "logprob": -1.4111328,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 11202,
+          "logprob": -0.45898438,
+          "special": false,
+          "text": "```"
+        },
+        {
+          "id": 8456,
+          "logprob": -0.41918945,
+          "special": false,
+          "text": "json"
+        },
+        {
+          "id": 187,
+          "logprob": -0.003189087,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 92,
+          "logprob": -0.061187744,
+          "special": false,
+          "text": "{"
+        },
+        {
+          "id": 187,
+          "logprob": -0.009010315,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 50276,
+          "logprob": -0.484375,
+          "special": false,
+          "text": "  "
+        },
+        {
+          "id": 3,
+          "logprob": -0.0002951622,
+          "special": false,
+          "text": "\""
+        }
+      ],
+      "top_tokens": null
+    },
+    "generated_text": ":\n\n```json\n{\n  \""
+  }
+]
--- a/integration-tests/models/snapshots/test_transformers_olmo/test_flash_llama_simple.json
+++ b/integration-tests/models/snapshots/test_transformers_olmo/test_flash_llama_simple.json
@ -0,0 +1,73 @@
+{
+  "details": {
+    "best_of_sequences": null,
+    "finish_reason": "length",
+    "generated_tokens": 10,
+    "prefill": [],
+    "seed": null,
+    "tokens": [
+      {
+        "id": 27,
+        "logprob": -1.3457031,
+        "special": false,
+        "text": ":"
+      },
+      {
+        "id": 187,
+        "logprob": -1.4580078,
+        "special": false,
+        "text": "\n"
+      },
+      {
+        "id": 187,
+        "logprob": -1.4472656,
+        "special": false,
+        "text": "\n"
+      },
+      {
+        "id": 11202,
+        "logprob": -0.46044922,
+        "special": false,
+        "text": "```"
+      },
+      {
+        "id": 8456,
+        "logprob": -0.4206543,
+        "special": false,
+        "text": "json"
+      },
+      {
+        "id": 187,
+        "logprob": -0.0031471252,
+        "special": false,
+        "text": "\n"
+      },
+      {
+        "id": 92,
+        "logprob": -0.061187744,
+        "special": false,
+        "text": "{"
+      },
+      {
+        "id": 187,
+        "logprob": -0.009033203,
+        "special": false,
+        "text": "\n"
+      },
+      {
+        "id": 50276,
+        "logprob": -0.48461914,
+        "special": false,
+        "text": "  "
+      },
+      {
+        "id": 3,
+        "logprob": -0.0002901554,
+        "special": false,
+        "text": "\""
+      }
+    ],
+    "top_tokens": null
+  },
+  "generated_text": ":\n\n```json\n{\n  \""
+}
--- a/integration-tests/models/test_transformers_olmo.py
+++ b/integration-tests/models/test_transformers_olmo.py
@ -0,0 +1,37 @@
+import pytest
+
+
+@pytest.fixture(scope="module")
+def flash_llama_handle(launcher):
+    with launcher("allenai/OLMo-7B-0724-Instruct-hf", num_shard=2) as handle:
+        yield handle
+
+
+@pytest.fixture(scope="module")
+async def flash_llama(flash_llama_handle):
+    await flash_llama_handle.health(300)
+    return flash_llama_handle.client
+
+
+@pytest.mark.asyncio
+@pytest.mark.private
+async def test_flash_llama_simple(flash_llama, response_snapshot):
+    response = await flash_llama.generate(
+        "Test request", max_new_tokens=10, decoder_input_details=True
+    )
+
+    assert response.details.generated_tokens == 10
+    assert response.generated_text == ':\n\n```json\n{\n  "'
+    assert response == response_snapshot
+
+
+@pytest.mark.asyncio
+@pytest.mark.private
+async def test_flash_llama_load(flash_llama, generate_load, response_snapshot):
+    responses = await generate_load(flash_llama, "Test request", max_new_tokens=10, n=4)
+
+    assert len(responses) == 4
+    assert responses[0].generated_text == ':\n\n```json\n{\n  "'
+    assert all([r.generated_text == responses[0].generated_text for r in responses])
+
+    assert responses == response_snapshot