From d8e93a1baa2f831625cbc35f147d35fd2ea81736 Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Wed, 5 Mar 2025 12:24:25 +0100 Subject: [PATCH] Making sure Olmo (transformers backend) works. --- .../test_flash_llama_load.json | 294 ++++++++++++++++++ .../test_flash_llama_simple.json | 73 +++++ .../models/test_transformers_olmo.py | 37 +++ 3 files changed, 404 insertions(+) create mode 100644 integration-tests/models/__snapshots__/test_transformers_olmo/test_flash_llama_load.json create mode 100644 integration-tests/models/__snapshots__/test_transformers_olmo/test_flash_llama_simple.json create mode 100644 integration-tests/models/test_transformers_olmo.py diff --git a/integration-tests/models/__snapshots__/test_transformers_olmo/test_flash_llama_load.json b/integration-tests/models/__snapshots__/test_transformers_olmo/test_flash_llama_load.json new file mode 100644 index 00000000..ed15bc0b --- /dev/null +++ b/integration-tests/models/__snapshots__/test_transformers_olmo/test_flash_llama_load.json @@ -0,0 +1,294 @@ +[ + { + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [], + "seed": null, + "tokens": [ + { + "id": 27, + "logprob": -1.3457031, + "special": false, + "text": ":" + }, + { + "id": 187, + "logprob": -1.453125, + "special": false, + "text": "\n" + }, + { + "id": 187, + "logprob": -1.4111328, + "special": false, + "text": "\n" + }, + { + "id": 11202, + "logprob": -0.45898438, + "special": false, + "text": "```" + }, + { + "id": 8456, + "logprob": -0.41918945, + "special": false, + "text": "json" + }, + { + "id": 187, + "logprob": -0.003189087, + "special": false, + "text": "\n" + }, + { + "id": 92, + "logprob": -0.061187744, + "special": false, + "text": "{" + }, + { + "id": 187, + "logprob": -0.009010315, + "special": false, + "text": "\n" + }, + { + "id": 50276, + "logprob": -0.484375, + "special": false, + "text": " " + }, + { + "id": 3, + "logprob": -0.0002951622, + "special": false, + "text": "\"" + } + ], + "top_tokens": null + }, + "generated_text": ":\n\n```json\n{\n \"" + }, + { + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [], + "seed": null, + "tokens": [ + { + "id": 27, + "logprob": -1.3457031, + "special": false, + "text": ":" + }, + { + "id": 187, + "logprob": -1.453125, + "special": false, + "text": "\n" + }, + { + "id": 187, + "logprob": -1.4111328, + "special": false, + "text": "\n" + }, + { + "id": 11202, + "logprob": -0.45898438, + "special": false, + "text": "```" + }, + { + "id": 8456, + "logprob": -0.41918945, + "special": false, + "text": "json" + }, + { + "id": 187, + "logprob": -0.003189087, + "special": false, + "text": "\n" + }, + { + "id": 92, + "logprob": -0.061187744, + "special": false, + "text": "{" + }, + { + "id": 187, + "logprob": -0.009010315, + "special": false, + "text": "\n" + }, + { + "id": 50276, + "logprob": -0.484375, + "special": false, + "text": " " + }, + { + "id": 3, + "logprob": -0.0002951622, + "special": false, + "text": "\"" + } + ], + "top_tokens": null + }, + "generated_text": ":\n\n```json\n{\n \"" + }, + { + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [], + "seed": null, + "tokens": [ + { + "id": 27, + "logprob": -1.3457031, + "special": false, + "text": ":" + }, + { + "id": 187, + "logprob": -1.453125, + "special": false, + "text": "\n" + }, + { + "id": 187, + "logprob": -1.4111328, + "special": false, + "text": "\n" + }, + { + "id": 11202, + "logprob": -0.45898438, + "special": false, + "text": "```" + }, + { + "id": 8456, + "logprob": -0.41918945, + "special": false, + "text": "json" + }, + { + "id": 187, + "logprob": -0.003189087, + "special": false, + "text": "\n" + }, + { + "id": 92, + "logprob": -0.061187744, + "special": false, + "text": "{" + }, + { + "id": 187, + "logprob": -0.009010315, + "special": false, + "text": "\n" + }, + { + "id": 50276, + "logprob": -0.484375, + "special": false, + "text": " " + }, + { + "id": 3, + "logprob": -0.0002951622, + "special": false, + "text": "\"" + } + ], + "top_tokens": null + }, + "generated_text": ":\n\n```json\n{\n \"" + }, + { + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [], + "seed": null, + "tokens": [ + { + "id": 27, + "logprob": -1.3457031, + "special": false, + "text": ":" + }, + { + "id": 187, + "logprob": -1.453125, + "special": false, + "text": "\n" + }, + { + "id": 187, + "logprob": -1.4111328, + "special": false, + "text": "\n" + }, + { + "id": 11202, + "logprob": -0.45898438, + "special": false, + "text": "```" + }, + { + "id": 8456, + "logprob": -0.41918945, + "special": false, + "text": "json" + }, + { + "id": 187, + "logprob": -0.003189087, + "special": false, + "text": "\n" + }, + { + "id": 92, + "logprob": -0.061187744, + "special": false, + "text": "{" + }, + { + "id": 187, + "logprob": -0.009010315, + "special": false, + "text": "\n" + }, + { + "id": 50276, + "logprob": -0.484375, + "special": false, + "text": " " + }, + { + "id": 3, + "logprob": -0.0002951622, + "special": false, + "text": "\"" + } + ], + "top_tokens": null + }, + "generated_text": ":\n\n```json\n{\n \"" + } +] diff --git a/integration-tests/models/__snapshots__/test_transformers_olmo/test_flash_llama_simple.json b/integration-tests/models/__snapshots__/test_transformers_olmo/test_flash_llama_simple.json new file mode 100644 index 00000000..695244cd --- /dev/null +++ b/integration-tests/models/__snapshots__/test_transformers_olmo/test_flash_llama_simple.json @@ -0,0 +1,73 @@ +{ + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [], + "seed": null, + "tokens": [ + { + "id": 27, + "logprob": -1.3457031, + "special": false, + "text": ":" + }, + { + "id": 187, + "logprob": -1.4580078, + "special": false, + "text": "\n" + }, + { + "id": 187, + "logprob": -1.4472656, + "special": false, + "text": "\n" + }, + { + "id": 11202, + "logprob": -0.46044922, + "special": false, + "text": "```" + }, + { + "id": 8456, + "logprob": -0.4206543, + "special": false, + "text": "json" + }, + { + "id": 187, + "logprob": -0.0031471252, + "special": false, + "text": "\n" + }, + { + "id": 92, + "logprob": -0.061187744, + "special": false, + "text": "{" + }, + { + "id": 187, + "logprob": -0.009033203, + "special": false, + "text": "\n" + }, + { + "id": 50276, + "logprob": -0.48461914, + "special": false, + "text": " " + }, + { + "id": 3, + "logprob": -0.0002901554, + "special": false, + "text": "\"" + } + ], + "top_tokens": null + }, + "generated_text": ":\n\n```json\n{\n \"" +} diff --git a/integration-tests/models/test_transformers_olmo.py b/integration-tests/models/test_transformers_olmo.py new file mode 100644 index 00000000..53bccafa --- /dev/null +++ b/integration-tests/models/test_transformers_olmo.py @@ -0,0 +1,37 @@ +import pytest + + +@pytest.fixture(scope="module") +def flash_llama_handle(launcher): + with launcher("allenai/OLMo-7B-0724-Instruct-hf", num_shard=2) as handle: + yield handle + + +@pytest.fixture(scope="module") +async def flash_llama(flash_llama_handle): + await flash_llama_handle.health(300) + return flash_llama_handle.client + + +@pytest.mark.asyncio +@pytest.mark.private +async def test_flash_llama_simple(flash_llama, response_snapshot): + response = await flash_llama.generate( + "Test request", max_new_tokens=10, decoder_input_details=True + ) + + assert response.details.generated_tokens == 10 + assert response.generated_text == ':\n\n```json\n{\n "' + assert response == response_snapshot + + +@pytest.mark.asyncio +@pytest.mark.private +async def test_flash_llama_load(flash_llama, generate_load, response_snapshot): + responses = await generate_load(flash_llama, "Test request", max_new_tokens=10, n=4) + + assert len(responses) == 4 + assert responses[0].generated_text == ':\n\n```json\n{\n "' + assert all([r.generated_text == responses[0].generated_text for r in responses]) + + assert responses == response_snapshot