Updating mllama after strftime. (#2993)

* Updating mllama after strftime. * Town instead village. * Forgot the integration snapshot. * Attempt to fix intel CPU. * Intel extension fix. * Workaround intel. * Moving those deps directly into pyproject. * Revert "Moving those deps directly into pyproject." This reverts commit 98c1496ea6. * Non system uv. * Fixing the docker environment hopefully. * Missed a step. * Move workdir up a bit. * Bailing out of reproducible python env. * Triton version.
2025-09-08 19:04:52 +00:00 · 2025-02-07 10:38:13 +01:00 · 2025-02-07 10:38:13 +01:00 · 4b8cda684b
commit 4b8cda684b
parent 856709d5c3
4 changed files with 20 additions and 83 deletions
--- a/16
+++ b/16
@ -215,16 +215,9 @@ COPY server server
 COPY server/Makefile server/Makefile
 ENV UV_SYSTEM_PYTHON=1
 RUN cd server && \
    make gen-server && \
    pip install -U pip uv && \
-	uv sync --frozen --extra gen --extra accelerate --extra compressed-tensors --extra quantize --extra peft --extra outlines --no-install-project && \
+    uv pip install -e ".[accelerate, compressed-tensors, peft, outlines]" --no-cache-dir
    . ./.venv/bin/activate && \
    make gen-server-raw
 RUN cd server && \
 	uv sync --frozen --extra gen --extra accelerate --extra compressed-tensors --extra quantize --extra peft --extra outlines && \
    . ./.venv/bin/activate && \
    pwd && \
    text-generation-server --help
 # Install benchmarker
 COPY --from=builder /usr/src/target/release-opt/text-generation-benchmark /usr/local/bin/text-generation-benchmark
@ -238,8 +231,5 @@ ENV ATTENTION=flashdecoding-ipex
 ENV PREFIX_CACHING=1
 ENV PREFILL_CHUNKING=1
 ENV CUDA_GRAPHS=0
-COPY ./tgi-entrypoint.sh /tgi-entrypoint.sh
+ENTRYPOINT ["text-generation-launcher"]
 RUN chmod +x /tgi-entrypoint.sh
 ENTRYPOINT ["/tgi-entrypoint.sh"]
 CMD ["--json-output"]
--- a/integration-tests/models/snapshots/test_mllama/test_mllama_load.json
+++ b/integration-tests/models/snapshots/test_mllama/test_mllama_load.json
@ -6,7 +6,7 @@
        "index": 0,
        "logprobs": null,
        "message": {
-          "content": "In a bustling city, a chicken named Cluck",
+          "content": "In a small town, a chicken named Cluck",
          "name": null,
          "role": "assistant",
          "tool_calls": null
@ -14,11 +14,11 @@
        "usage": null
      }
    ],
-    "created": 1727773835,
+    "created": 1738753835,
    "id": "",
    "model": "meta-llama/Llama-3.2-11B-Vision-Instruct",
    "object": "chat.completion",
-    "system_fingerprint": "2.4.2-dev0-native",
+    "system_fingerprint": "3.1.1-dev0-native",
    "usage": {
      "completion_tokens": 10,
      "prompt_tokens": 50,
@ -32,7 +32,7 @@
        "index": 0,
        "logprobs": null,
        "message": {
-          "content": "In a world where even chickens could dream big,",
+          "content": "In a small town, a chicken named Cluck",
          "name": null,
          "role": "assistant",
          "tool_calls": null
@ -40,63 +40,11 @@
        "usage": null
      }
    ],
-    "created": 1727773835,
+    "created": 1738753835,
    "id": "",
    "model": "meta-llama/Llama-3.2-11B-Vision-Instruct",
    "object": "chat.completion",
-    "system_fingerprint": "2.4.2-dev0-native",
+    "system_fingerprint": "3.1.1-dev0-native",
    "usage": {
      "completion_tokens": 10,
      "prompt_tokens": 50,
      "total_tokens": 60
    }
  },
  {
    "choices": [
      {
        "finish_reason": "length",
        "index": 0,
        "logprobs": null,
        "message": {
          "content": "In a world where even chickens could dream big,",
          "name": null,
          "role": "assistant",
          "tool_calls": null
        },
        "usage": null
      }
    ],
    "created": 1727773835,
    "id": "",
    "model": "meta-llama/Llama-3.2-11B-Vision-Instruct",
    "object": "chat.completion",
    "system_fingerprint": "2.4.2-dev0-native",
    "usage": {
      "completion_tokens": 10,
      "prompt_tokens": 50,
      "total_tokens": 60
    }
  },
  {
    "choices": [
      {
        "finish_reason": "length",
        "index": 0,
        "logprobs": null,
        "message": {
          "content": "In a world where even chickens could dream big,",
          "name": null,
          "role": "assistant",
          "tool_calls": null
        },
        "usage": null
      }
    ],
    "created": 1727773835,
    "id": "",
    "model": "meta-llama/Llama-3.2-11B-Vision-Instruct",
    "object": "chat.completion",
    "system_fingerprint": "2.4.2-dev0-native",
    "usage": {
      "completion_tokens": 10,
      "prompt_tokens": 50,
--- a/integration-tests/models/snapshots/test_mllama/test_mllama_simpl.json
+++ b/integration-tests/models/snapshots/test_mllama/test_mllama_simpl.json
@ -5,7 +5,7 @@
      "index": 0,
      "logprobs": null,
      "message": {
-        "content": "In a bustling city, a chicken named Cluck",
+        "content": "In a small town, a chicken named Cluck",
        "name": null,
        "role": "assistant",
        "tool_calls": null
@ -13,11 +13,11 @@
      "usage": null
    }
  ],
-  "created": 1727556016,
+  "created": 1738753833,
  "id": "",
  "model": "meta-llama/Llama-3.2-11B-Vision-Instruct",
  "object": "chat.completion",
-  "system_fingerprint": "2.4.2-dev0-native",
+  "system_fingerprint": "3.1.1-dev0-native",
  "usage": {
    "completion_tokens": 10,
    "prompt_tokens": 50,
--- a/integration-tests/models/test_mllama.py
+++ b/integration-tests/models/test_mllama.py
@ -47,8 +47,7 @@ async def test_mllama_simpl(mllama, response_snapshot):
        "total_tokens": 60,
    }
    assert (
-        response.choices[0].message.content
+        response.choices[0].message.content == "In a small town, a chicken named Cluck"
        == "In a bustling city, a chicken named Cluck"
    )
    assert response == response_snapshot
@ -84,12 +83,12 @@ async def test_mllama_load(mllama, generate_load, response_snapshot):
    ]
    responses = await asyncio.gather(*futures)
-    _ = [response.choices[0].message.content for response in responses]
+    generated_texts = [response.choices[0].message.content for response in responses]
    # XXX: TODO: Fix this test.
-    # assert generated_texts[0] == "In a bustling city, a chicken named Cluck"
+    assert generated_texts[0] == "In a small town, a chicken named Cluck"
-    # assert len(generated_texts) == 4
+    assert len(generated_texts) == 2
-    # assert generated_texts, all(
+    assert generated_texts, all(
-    #     [text == generated_texts[0] for text in generated_texts]
+        [text == generated_texts[0] for text in generated_texts]
-    # )
+    )
-    # assert responses == response_snapshot
+    assert responses == response_snapshot