Updating mllama after strftime. (#2993)

* Updating mllama after strftime.

* Town instead village.

* Forgot the integration snapshot.

* Attempt to fix intel CPU.

* Intel extension fix.

* Workaround intel.

* Moving those deps directly into pyproject.

* Revert "Moving those deps directly into pyproject."

This reverts commit 98c1496ea6.

* Non system uv.

* Fixing the docker environment hopefully.

* Missed a step.

* Move workdir up a bit.

* Bailing out of reproducible python env.

* Triton version.
This commit is contained in:
Nicolas Patry 2025-02-07 10:38:13 +01:00 committed by GitHub
parent 856709d5c3
commit 4b8cda684b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 20 additions and 83 deletions

View File

@ -215,16 +215,9 @@ COPY server server
COPY server/Makefile server/Makefile
ENV UV_SYSTEM_PYTHON=1
RUN cd server && \
make gen-server && \
pip install -U pip uv && \
uv sync --frozen --extra gen --extra accelerate --extra compressed-tensors --extra quantize --extra peft --extra outlines --no-install-project && \
. ./.venv/bin/activate && \
make gen-server-raw
RUN cd server && \
uv sync --frozen --extra gen --extra accelerate --extra compressed-tensors --extra quantize --extra peft --extra outlines && \
. ./.venv/bin/activate && \
pwd && \
text-generation-server --help
uv pip install -e ".[accelerate, compressed-tensors, peft, outlines]" --no-cache-dir
# Install benchmarker
COPY --from=builder /usr/src/target/release-opt/text-generation-benchmark /usr/local/bin/text-generation-benchmark
@ -238,8 +231,5 @@ ENV ATTENTION=flashdecoding-ipex
ENV PREFIX_CACHING=1
ENV PREFILL_CHUNKING=1
ENV CUDA_GRAPHS=0
COPY ./tgi-entrypoint.sh /tgi-entrypoint.sh
RUN chmod +x /tgi-entrypoint.sh
ENTRYPOINT ["/tgi-entrypoint.sh"]
ENTRYPOINT ["text-generation-launcher"]
CMD ["--json-output"]

View File

@ -6,7 +6,7 @@
"index": 0,
"logprobs": null,
"message": {
"content": "In a bustling city, a chicken named Cluck",
"content": "In a small town, a chicken named Cluck",
"name": null,
"role": "assistant",
"tool_calls": null
@ -14,11 +14,11 @@
"usage": null
}
],
"created": 1727773835,
"created": 1738753835,
"id": "",
"model": "meta-llama/Llama-3.2-11B-Vision-Instruct",
"object": "chat.completion",
"system_fingerprint": "2.4.2-dev0-native",
"system_fingerprint": "3.1.1-dev0-native",
"usage": {
"completion_tokens": 10,
"prompt_tokens": 50,
@ -32,7 +32,7 @@
"index": 0,
"logprobs": null,
"message": {
"content": "In a world where even chickens could dream big,",
"content": "In a small town, a chicken named Cluck",
"name": null,
"role": "assistant",
"tool_calls": null
@ -40,63 +40,11 @@
"usage": null
}
],
"created": 1727773835,
"created": 1738753835,
"id": "",
"model": "meta-llama/Llama-3.2-11B-Vision-Instruct",
"object": "chat.completion",
"system_fingerprint": "2.4.2-dev0-native",
"usage": {
"completion_tokens": 10,
"prompt_tokens": 50,
"total_tokens": 60
}
},
{
"choices": [
{
"finish_reason": "length",
"index": 0,
"logprobs": null,
"message": {
"content": "In a world where even chickens could dream big,",
"name": null,
"role": "assistant",
"tool_calls": null
},
"usage": null
}
],
"created": 1727773835,
"id": "",
"model": "meta-llama/Llama-3.2-11B-Vision-Instruct",
"object": "chat.completion",
"system_fingerprint": "2.4.2-dev0-native",
"usage": {
"completion_tokens": 10,
"prompt_tokens": 50,
"total_tokens": 60
}
},
{
"choices": [
{
"finish_reason": "length",
"index": 0,
"logprobs": null,
"message": {
"content": "In a world where even chickens could dream big,",
"name": null,
"role": "assistant",
"tool_calls": null
},
"usage": null
}
],
"created": 1727773835,
"id": "",
"model": "meta-llama/Llama-3.2-11B-Vision-Instruct",
"object": "chat.completion",
"system_fingerprint": "2.4.2-dev0-native",
"system_fingerprint": "3.1.1-dev0-native",
"usage": {
"completion_tokens": 10,
"prompt_tokens": 50,

View File

@ -5,7 +5,7 @@
"index": 0,
"logprobs": null,
"message": {
"content": "In a bustling city, a chicken named Cluck",
"content": "In a small town, a chicken named Cluck",
"name": null,
"role": "assistant",
"tool_calls": null
@ -13,11 +13,11 @@
"usage": null
}
],
"created": 1727556016,
"created": 1738753833,
"id": "",
"model": "meta-llama/Llama-3.2-11B-Vision-Instruct",
"object": "chat.completion",
"system_fingerprint": "2.4.2-dev0-native",
"system_fingerprint": "3.1.1-dev0-native",
"usage": {
"completion_tokens": 10,
"prompt_tokens": 50,

View File

@ -47,8 +47,7 @@ async def test_mllama_simpl(mllama, response_snapshot):
"total_tokens": 60,
}
assert (
response.choices[0].message.content
== "In a bustling city, a chicken named Cluck"
response.choices[0].message.content == "In a small town, a chicken named Cluck"
)
assert response == response_snapshot
@ -84,12 +83,12 @@ async def test_mllama_load(mllama, generate_load, response_snapshot):
]
responses = await asyncio.gather(*futures)
_ = [response.choices[0].message.content for response in responses]
generated_texts = [response.choices[0].message.content for response in responses]
# XXX: TODO: Fix this test.
# assert generated_texts[0] == "In a bustling city, a chicken named Cluck"
# assert len(generated_texts) == 4
# assert generated_texts, all(
# [text == generated_texts[0] for text in generated_texts]
# )
# assert responses == response_snapshot
assert generated_texts[0] == "In a small town, a chicken named Cluck"
assert len(generated_texts) == 2
assert generated_texts, all(
[text == generated_texts[0] for text in generated_texts]
)
assert responses == response_snapshot