diff --git a/integration-tests/models/__snapshots__/test_flash_grammar_llama/test_non_flash_llama_grammar_json.json b/integration-tests/models/__snapshots__/test_flash_grammar_llama/test_non_flash_llama_grammar_json.json deleted file mode 100644 index 049d403b..00000000 --- a/integration-tests/models/__snapshots__/test_flash_grammar_llama/test_non_flash_llama_grammar_json.json +++ /dev/null @@ -1,274 +0,0 @@ -{ - "details": { - "best_of_sequences": null, - "finish_reason": "eos_token", - "generated_tokens": 30, - "prefill": [ - { - "id": 1, - "logprob": null, - "text": "" - }, - { - "id": 5235, - "logprob": -10.061389, - "text": "info" - }, - { - "id": 29901, - "logprob": -3.2349052, - "text": ":" - }, - { - "id": 13260, - "logprob": -10.626516, - "text": "dav" - }, - { - "id": 333, - "logprob": -0.08372568, - "text": "id" - }, - { - "id": 8753, - "logprob": -7.5279083, - "text": "hol" - }, - { - "id": 17559, - "logprob": -3.8427715, - "text": "tz" - }, - { - "id": 763, - "logprob": -10.143592, - "text": "like" - }, - { - "id": 10697, - "logprob": -10.200588, - "text": "trees" - }, - { - "id": 322, - "logprob": -2.5744739, - "text": "and" - }, - { - "id": 756, - "logprob": -7.4822097, - "text": "has" - }, - { - "id": 1023, - "logprob": -5.043413, - "text": "two" - }, - { - "id": 274, - "logprob": -5.326814, - "text": "c" - }, - { - "id": 1446, - "logprob": -0.67299384, - "text": "ats" - }, - { - "id": 29889, - "logprob": -0.999048, - "text": "." - }, - { - "id": 29871, - "logprob": -4.2404404, - "text": "" - } - ], - "seed": null, - "tokens": [ - { - "id": 6377, - "logprob": -0.1497998, - "special": false, - "text": "{\"" - }, - { - "id": 29888, - "logprob": -0.1359236, - "special": false, - "text": "f" - }, - { - "id": 12935, - "logprob": -0.01771052, - "special": false, - "text": "irs" - }, - { - "id": 29873, - "logprob": -0.00084543246, - "special": false, - "text": "t" - }, - { - "id": 1170, - "logprob": -0.0053624124, - "special": false, - "text": "Name" - }, - { - "id": 4710, - "logprob": -0.13352497, - "special": false, - "text": "\":\"" - }, - { - "id": 19504, - "logprob": -0.8816582, - "special": false, - "text": "David" - }, - { - "id": 3284, - "logprob": -0.1636697, - "special": false, - "text": "\",\"" - }, - { - "id": 29882, - "logprob": -0.08828322, - "special": false, - "text": "h" - }, - { - "id": 711, - "logprob": -0.66238964, - "special": false, - "text": "ob" - }, - { - "id": 1609, - "logprob": -5.566919e-05, - "special": false, - "text": "by" - }, - { - "id": 4710, - "logprob": -0.2296004, - "special": false, - "text": "\":\"" - }, - { - "id": 29911, - "logprob": -2.3745353, - "special": false, - "text": "T" - }, - { - "id": 11003, - "logprob": -0.032119535, - "special": false, - "text": "rees" - }, - { - "id": 3284, - "logprob": -0.22055298, - "special": false, - "text": "\",\"" - }, - { - "id": 4230, - "logprob": -0.067228675, - "special": false, - "text": "last" - }, - { - "id": 1170, - "logprob": -0.0035023084, - "special": false, - "text": "Name" - }, - { - "id": 4710, - "logprob": -0.004494921, - "special": false, - "text": "\":\"" - }, - { - "id": 29950, - "logprob": -0.12524654, - "special": false, - "text": "H" - }, - { - "id": 14339, - "logprob": -0.009601957, - "special": false, - "text": "olt" - }, - { - "id": 29920, - "logprob": -0.00041619223, - "special": false, - "text": "z" - }, - { - "id": 3284, - "logprob": -0.116980776, - "special": false, - "text": "\",\"" - }, - { - "id": 29876, - "logprob": -0.2994127, - "special": false, - "text": "n" - }, - { - "id": 398, - "logprob": -0.0030563807, - "special": false, - "text": "um" - }, - { - "id": 29907, - "logprob": -0.37736154, - "special": false, - "text": "C" - }, - { - "id": 1446, - "logprob": -0.00031073033, - "special": false, - "text": "ats" - }, - { - "id": 1115, - "logprob": -0.0021851014, - "special": false, - "text": "\":" - }, - { - "id": 29906, - "logprob": -0.07180126, - "special": false, - "text": "2" - }, - { - "id": 29913, - "logprob": -0.018707855, - "special": false, - "text": "}" - }, - { - "id": 2, - "logprob": 0.0, - "special": true, - "text": "" - } - ], - "top_tokens": null - }, - "generated_text": "{\"firstName\":\"David\",\"hobby\":\"Trees\",\"lastName\":\"Holtz\",\"numCats\":2}" -} diff --git a/integration-tests/models/test_flash_grammar_llama.py b/integration-tests/models/test_flash_grammar_llama.py index 59e9774b..585d0656 100644 --- a/integration-tests/models/test_flash_grammar_llama.py +++ b/integration-tests/models/test_flash_grammar_llama.py @@ -144,68 +144,3 @@ async def test_flash_llama_grammar_single_load_instance( assert response.generated_text == "123456@gmail.com" assert response == response_snapshot - - -@pytest.fixture(scope="module") -def non_flash_llama_grammar_handle(launcher): - with launcher( - "TinyLlama/TinyLlama-1.1B-Chat-v1.0", - num_shard=1, - disable_grammar_support=False, - use_flash_attention=False, - ) as handle: - yield handle - - -@pytest.fixture(scope="module") -async def non_flash_llama_grammar(non_flash_llama_grammar_handle): - await non_flash_llama_grammar_handle.health(300) - return non_flash_llama_grammar_handle.client - - -@pytest.mark.asyncio -async def test_non_flash_llama_grammar_json(non_flash_llama_grammar, response_snapshot): - response = await non_flash_llama_grammar.generate( - "info: david holtz like trees and has two cats. ", - max_new_tokens=100, - decoder_input_details=True, - seed=0, - grammar={ - "type": GrammarType.Json, - "value": json.dumps( - { - "type": "object", - "$id": "https://example.com/person.schema.json", - "$schema": "https://json-schema.org/draft/2020-12/schema", - "title": "Person", - "properties": { - "firstName": { - "type": "string", - "description": "The person'''s first name.", - }, - "lastName": { - "type": "string", - "description": "The person'''s last name.", - }, - "hobby": { - "description": "The person'''s hobby.", - "type": "string", - }, - "numCats": { - "description": "The number of cats the person has.", - "type": "integer", - "minimum": 0, - }, - }, - "required": ["firstName", "lastName", "hobby", "numCats"], - } - ), - }, - ) - - assert response.details.generated_tokens == 30 - assert ( - response.generated_text - == '{"firstName":"David","hobby":"Trees","lastName":"Holtz","numCats":2}' - ) - assert response == response_snapshot