fix: prefer seed 1 in all cases

2025-09-10 20:04:52 +00:00 · 2024-02-22 18:51:02 +00:00 · 2024-02-22 18:51:02 +00:00 · d2635dd01b
commit d2635dd01b
parent ac5a1c6f51
17 changed files with 96 additions and 94 deletions
--- a/integration-tests/models/snapshots/test_bloom_560m/test_bloom_560m.json
+++ b/integration-tests/models/snapshots/test_bloom_560m/test_bloom_560m.json
@ -60,69 +60,70 @@
        "text": " d'abord"
      }
    ],
-    "seed": 0,
+    "seed": 1,
    "tokens": [
      {
-        "id": 578,
+        "id": 2997,
-        "logprob": -1.6591797,
+        "logprob": -4.4101562,
        "special": false,
-        "text": " le"
+        "text": " vous"
      },
      {
-        "id": 5608,
+        "id": 71256,
-        "logprob": -2.4492188,
+        "logprob": -5.3828125,
        "special": false,
-        "text": " faire"
+        "text": " retrouver"
      },
      {
-        "id": 159570,
+        "id": 693,
-        "logprob": -6.6835938,
+        "logprob": -2.1308594,
        "special": false,
-        "text": " réch"
+        "text": " à"
      },
      {
-        "id": 810,
+        "id": 366,
-        "logprob": 0.0,
+        "logprob": -1.5234375,
        "special": false,
-        "text": "au"
+        "text": " la"
      },
      {
-        "id": 12736,
+        "id": 221398,
-        "logprob": 0.0,
+        "logprob": -2.671875,
        "special": false,
-        "text": "ffer"
+        "text": " terrasse"
      },
      {
-        "id": 1742,
+        "id": 1375,
-        "logprob": -2.5175781,
+        "logprob": -4.375,
        "special": false,
-        "text": " au"
+        "text": " pour"
      },
      {
-        "id": 6105,
+        "id": 86887,
-        "logprob": -2.0078125,
+        "logprob": -4.859375,
        "special": false,
-        "text": " bain"
+        "text": " essayer"
      },
      {
-        "id": 88254,
+        "id": 2155,
-        "logprob": -0.12695312,
+        "logprob": -2.7519531,
        "special": false,
-        "text": "-mar"
+        "text": " ce"
      },
      {
-        "id": 641,
+        "id": 5743,
-        "logprob": 0.0,
+        "logprob": -3.1992188,
        "special": false,
-        "text": "ie"
+        "text": " jus"
      },
      {
-        "id": 2940,
+        "id": 1479,
-        "logprob": -3.5175781,
+        "logprob": -3.3203125,
        "special": false,
-        "text": " avec"
+        "text": " qui"
      }
-    ]
+    ],
    "top_tokens": null
  },
-  "generated_text": " le faire réchauffer au bain-marie avec"
+  "generated_text": " vous retrouver à la terrasse pour essayer ce jus qui"
 }
--- a/integration-tests/models/snapshots/test_bloom_560m/test_bloom_560m_all_params.json
+++ b/integration-tests/models/snapshots/test_bloom_560m/test_bloom_560m_all_params.json
@ -30,7 +30,7 @@
        "text": " d'abord"
      }
    ],
-    "seed": 0,
+    "seed": 1,
    "tokens": [
      {
        "id": 408,
@ -38,61 +38,62 @@
        "special": false,
        "text": " que"
      },
      {
        "id": 366,
        "logprob": -1.2939453,
        "special": false,
        "text": " la"
      },
      {
        "id": 8769,
        "logprob": -0.3708496,
        "special": false,
        "text": " personne"
      },
      {
        "id": 1479,
        "logprob": -2.2871094,
        "special": false,
        "text": " qui"
      },
      {
        "id": 2997,
-        "logprob": -0.8671875,
+        "logprob": -1.2939453,
        "special": false,
        "text": " vous"
      },
      {
-        "id": 35977,
+        "id": 116938,
-        "logprob": -1.5097656,
+        "logprob": -0.8618164,
        "special": false,
-        "text": " suit"
+        "text": " ayez"
      },
      {
        "id": 21558,
        "logprob": -0.07891846,
        "special": false,
        "text": " ait"
      },
      {
        "id": 447,
-        "logprob": -0.12695312,
+        "logprob": 0.0,
        "special": false,
        "text": " un"
      },
      {
-        "id": 78606,
+        "id": 11299,
-        "logprob": -2.21875,
+        "logprob": -0.20141602,
        "special": false,
-        "text": " profil"
+        "text": " compte"
      },
      {
-        "id": 3899,
+        "id": 198236,
-        "logprob": -1.3535156,
+        "logprob": -0.4741211,
        "special": false,
-        "text": " bien"
+        "text": " PayPal"
      },
      {
        "id": 17,
        "logprob": 0.0,
        "special": false,
        "text": "."
      },
      {
        "id": 12424,
        "logprob": -0.6040039,
        "special": false,
        "text": " Pour"
      },
      {
        "id": 11676,
        "logprob": -0.9741211,
        "special": false,
        "text": " cela"
      },
      {
        "id": 915,
        "logprob": -0.31323242,
        "special": false,
        "text": " :"
      }
-    ]
+    ],
    "top_tokens": null
  },
-  "generated_text": "Pour déguster un ortolan, il faut tout d'abord que la personne qui vous suit ait un profil bien"
+  "generated_text": "Pour déguster un ortolan, il faut tout d'abord que vous ayez un compte PayPal. Pour cela :"
 }
--- a/integration-tests/models/test_bloom_560m.py
+++ b/integration-tests/models/test_bloom_560m.py
@ -20,7 +20,7 @@ async def test_bloom_560m(bloom_560, response_snapshot):
        max_new_tokens=10,
        top_p=0.9,
        decoder_input_details=True,
-        seed=0,
+        seed=1,
    )
    assert response.details.generated_tokens == 10
@ -42,7 +42,7 @@ async def test_bloom_560m_all_params(bloom_560, response_snapshot):
        typical_p=0.9,
        watermark=True,
        decoder_input_details=True,
-        seed=0,
+        seed=1,
    )
    assert response.details.generated_tokens == 10
--- a/integration-tests/models/test_bloom_560m_sharded.py
+++ b/integration-tests/models/test_bloom_560m_sharded.py
@ -20,7 +20,7 @@ async def test_bloom_560m_sharded(bloom_560m_sharded, response_snapshot):
        max_new_tokens=10,
        top_p=0.9,
        decoder_input_details=True,
-        seed=0,
+        seed=1,
    )
    assert response.details.generated_tokens == 10
--- a/integration-tests/models/test_flash_awq.py
+++ b/integration-tests/models/test_flash_awq.py
@ -45,7 +45,7 @@ async def test_flash_llama_awq_all_params(flash_llama_awq, response_snapshot):
        typical_p=0.9,
        watermark=True,
        decoder_input_details=True,
-        seed=0,
+        seed=1,
    )
    assert response.details.generated_tokens == 10
--- a/integration-tests/models/test_flash_falcon.py
+++ b/integration-tests/models/test_flash_falcon.py
@ -42,7 +42,7 @@ async def test_flash_falcon_all_params(flash_falcon, response_snapshot):
        typical_p=0.9,
        watermark=True,
        decoder_input_details=True,
-        seed=0,
+        seed=1,
    )
    assert response.details.generated_tokens == 10
--- a/integration-tests/models/test_flash_gemma.py
+++ b/integration-tests/models/test_flash_gemma.py
@ -13,7 +13,7 @@ async def flash_gemma(flash_gemma_handle):
    return flash_gemma_handle.client
-@pytest.mark.skip
+# @pytest.mark.skip
@pytest.mark.asyncio
@pytest.mark.private
 async def test_flash_gemma(flash_gemma, response_snapshot):
@ -25,7 +25,7 @@ async def test_flash_gemma(flash_gemma, response_snapshot):
    assert response == response_snapshot
-@pytest.mark.skip
+# @pytest.mark.skip
@pytest.mark.asyncio
@pytest.mark.private
 async def test_flash_gemma_all_params(flash_gemma, response_snapshot):
@ -42,14 +42,14 @@ async def test_flash_gemma_all_params(flash_gemma, response_snapshot):
        typical_p=0.9,
        watermark=True,
        decoder_input_details=True,
-        seed=0,
+        seed=1,
    )
    assert response.details.generated_tokens == 10
    assert response == response_snapshot
-@pytest.mark.skip
+# @pytest.mark.skip
@pytest.mark.asyncio
@pytest.mark.private
 async def test_flash_gemma_load(flash_gemma, generate_load, response_snapshot):
--- a/integration-tests/models/test_flash_llama.py
+++ b/integration-tests/models/test_flash_llama.py
@ -40,7 +40,7 @@ async def test_flash_llama_all_params(flash_llama, response_snapshot):
        typical_p=0.9,
        watermark=True,
        decoder_input_details=True,
-        seed=0,
+        seed=1,
    )
    assert response.details.generated_tokens == 5
--- a/integration-tests/models/test_flash_llama_gptq.py
+++ b/integration-tests/models/test_flash_llama_gptq.py
@ -39,7 +39,7 @@ async def test_flash_llama_gptq_all_params(flash_llama_gptq, response_snapshot):
        typical_p=0.9,
        watermark=True,
        decoder_input_details=True,
-        seed=0,
+        seed=1,
    )
    assert response.details.generated_tokens == 10
--- a/integration-tests/models/test_flash_medusa.py
+++ b/integration-tests/models/test_flash_medusa.py
@ -38,7 +38,7 @@ async def test_flash_medusa_all_params(flash_medusa, response_snapshot):
        typical_p=0.9,
        watermark=True,
        decoder_input_details=True,
-        seed=0,
+        seed=1,
    )
    assert response.details.generated_tokens == 10
--- a/integration-tests/models/test_flash_mistral.py
+++ b/integration-tests/models/test_flash_mistral.py
@ -39,7 +39,7 @@ async def test_flash_mistral_all_params(flash_mistral, response_snapshot):
        typical_p=0.9,
        watermark=True,
        decoder_input_details=True,
-        seed=0,
+        seed=1,
    )
    assert response.details.generated_tokens == 10
--- a/integration-tests/models/test_flash_phi.py
+++ b/integration-tests/models/test_flash_phi.py
@ -39,7 +39,7 @@ async def test_flash_phi_all_params(flash_phi, response_snapshot):
        typical_p=0.9,
        watermark=True,
        decoder_input_details=True,
-        seed=0,
+        seed=1,
    )
    assert response.details.generated_tokens == 6
--- a/integration-tests/models/test_flash_starcoder.py
+++ b/integration-tests/models/test_flash_starcoder.py
@ -33,7 +33,7 @@ async def test_flash_starcoder_default_params(flash_starcoder, response_snapshot
        temperature=0.2,
        top_p=0.95,
        decoder_input_details=True,
-        seed=0,
+        seed=1,
    )
    assert response.details.generated_tokens == 60
--- a/integration-tests/models/test_flash_starcoder_gptq.py
+++ b/integration-tests/models/test_flash_starcoder_gptq.py
@ -34,7 +34,7 @@ async def test_flash_starcoder_gptq_default_params(
        temperature=0.2,
        top_p=0.95,
        decoder_input_details=True,
-        seed=0,
+        seed=1,
    )
    assert response.details.generated_tokens == 20
    assert response == generous_response_snapshot
--- a/integration-tests/models/test_grammar_llama.py
+++ b/integration-tests/models/test_grammar_llama.py
@ -34,7 +34,7 @@ async def test_flash_llama_grammar_regex(flash_llama_grammar, response_snapshot)
        "Whats Googles DNS",
        max_new_tokens=10,
        decoder_input_details=True,
-        seed=0,
+        seed=1,
        grammar={
            "type": GrammarType.Regex,  # "regex"
            "value": "((25[0-5]|2[0-4]\\d|[01]?\\d\\d?)\\.){3}(25[0-5]|2[0-4]\\d|[01]?\\d\\d?)",
@ -52,7 +52,7 @@ async def test_flash_llama_grammar_json(flash_llama_grammar, response_snapshot):
        "info: david holtz like trees and has two cats. ",
        max_new_tokens=100,
        decoder_input_details=True,
-        seed=0,
+        seed=1,
        grammar={
            "type": GrammarType.Json,  # "json"
            "value": json.dumps(
@ -104,7 +104,7 @@ async def test_flash_llama_grammar_load(
        max_new_tokens=10,
        n=4,
        stop_sequences=[".com"],
-        seed=0,
+        seed=1,
        grammar={
            "type": GrammarType.Regex,  # "regex"
            "value": "[\\w-]+@([\\w-]+\\.)+[\\w-]+",  # email regex
@ -133,7 +133,7 @@ async def test_flash_llama_grammar_single_load_instance(
        "name: david. email:  ",
        max_new_tokens=10,
        stop_sequences=[".com"],
-        seed=0,
+        seed=1,
        grammar={
            "type": GrammarType.Regex,  # "regex"
            "value": "[\\w-]+@([\\w-]+\\.)+[\\w-]+",  # email regex
--- a/integration-tests/models/test_mamba.py
+++ b/integration-tests/models/test_mamba.py
@ -39,7 +39,7 @@ async def test_mamba_all_params(fused_kernel_mamba, response_snapshot):
        typical_p=0.9,
        watermark=True,
        decoder_input_details=True,
-        seed=0,
+        seed=1,
    )
    assert response.details.generated_tokens == 10
--- a/integration-tests/models/test_mt0_base.py
+++ b/integration-tests/models/test_mt0_base.py
@ -20,7 +20,7 @@ async def test_mt0_base(mt0_base, response_snapshot):
        max_new_tokens=10,
        top_p=0.9,
        decoder_input_details=True,
-        seed=0,
+        seed=1,
    )
    assert response.details.generated_tokens == 5
@ -42,7 +42,7 @@ async def test_mt0_base_all_params(mt0_base, response_snapshot):
        typical_p=0.9,
        watermark=True,
        decoder_input_details=True,
-        seed=0,
+        seed=1,
    )
    assert response.details.generated_tokens == 9