mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-19 13:52:07 +00:00
Updating mllama after strftime. (#2993)
* Updating mllama after strftime.
* Town instead village.
* Forgot the integration snapshot.
* Attempt to fix intel CPU.
* Intel extension fix.
* Workaround intel.
* Moving those deps directly into pyproject.
* Revert "Moving those deps directly into pyproject."
This reverts commit 98c1496ea6
.
* Non system uv.
* Fixing the docker environment hopefully.
* Missed a step.
* Move workdir up a bit.
* Bailing out of reproducible python env.
* Triton version.
This commit is contained in:
parent
856709d5c3
commit
4b8cda684b
@ -215,16 +215,9 @@ COPY server server
|
||||
COPY server/Makefile server/Makefile
|
||||
ENV UV_SYSTEM_PYTHON=1
|
||||
RUN cd server && \
|
||||
make gen-server && \
|
||||
pip install -U pip uv && \
|
||||
uv sync --frozen --extra gen --extra accelerate --extra compressed-tensors --extra quantize --extra peft --extra outlines --no-install-project && \
|
||||
. ./.venv/bin/activate && \
|
||||
make gen-server-raw
|
||||
|
||||
RUN cd server && \
|
||||
uv sync --frozen --extra gen --extra accelerate --extra compressed-tensors --extra quantize --extra peft --extra outlines && \
|
||||
. ./.venv/bin/activate && \
|
||||
pwd && \
|
||||
text-generation-server --help
|
||||
uv pip install -e ".[accelerate, compressed-tensors, peft, outlines]" --no-cache-dir
|
||||
|
||||
# Install benchmarker
|
||||
COPY --from=builder /usr/src/target/release-opt/text-generation-benchmark /usr/local/bin/text-generation-benchmark
|
||||
@ -238,8 +231,5 @@ ENV ATTENTION=flashdecoding-ipex
|
||||
ENV PREFIX_CACHING=1
|
||||
ENV PREFILL_CHUNKING=1
|
||||
ENV CUDA_GRAPHS=0
|
||||
COPY ./tgi-entrypoint.sh /tgi-entrypoint.sh
|
||||
RUN chmod +x /tgi-entrypoint.sh
|
||||
|
||||
ENTRYPOINT ["/tgi-entrypoint.sh"]
|
||||
ENTRYPOINT ["text-generation-launcher"]
|
||||
CMD ["--json-output"]
|
||||
|
@ -6,7 +6,7 @@
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"message": {
|
||||
"content": "In a bustling city, a chicken named Cluck",
|
||||
"content": "In a small town, a chicken named Cluck",
|
||||
"name": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
@ -14,11 +14,11 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1727773835,
|
||||
"created": 1738753835,
|
||||
"id": "",
|
||||
"model": "meta-llama/Llama-3.2-11B-Vision-Instruct",
|
||||
"object": "chat.completion",
|
||||
"system_fingerprint": "2.4.2-dev0-native",
|
||||
"system_fingerprint": "3.1.1-dev0-native",
|
||||
"usage": {
|
||||
"completion_tokens": 10,
|
||||
"prompt_tokens": 50,
|
||||
@ -32,7 +32,7 @@
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"message": {
|
||||
"content": "In a world where even chickens could dream big,",
|
||||
"content": "In a small town, a chicken named Cluck",
|
||||
"name": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
@ -40,63 +40,11 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1727773835,
|
||||
"created": 1738753835,
|
||||
"id": "",
|
||||
"model": "meta-llama/Llama-3.2-11B-Vision-Instruct",
|
||||
"object": "chat.completion",
|
||||
"system_fingerprint": "2.4.2-dev0-native",
|
||||
"usage": {
|
||||
"completion_tokens": 10,
|
||||
"prompt_tokens": 50,
|
||||
"total_tokens": 60
|
||||
}
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "length",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"message": {
|
||||
"content": "In a world where even chickens could dream big,",
|
||||
"name": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1727773835,
|
||||
"id": "",
|
||||
"model": "meta-llama/Llama-3.2-11B-Vision-Instruct",
|
||||
"object": "chat.completion",
|
||||
"system_fingerprint": "2.4.2-dev0-native",
|
||||
"usage": {
|
||||
"completion_tokens": 10,
|
||||
"prompt_tokens": 50,
|
||||
"total_tokens": 60
|
||||
}
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "length",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"message": {
|
||||
"content": "In a world where even chickens could dream big,",
|
||||
"name": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1727773835,
|
||||
"id": "",
|
||||
"model": "meta-llama/Llama-3.2-11B-Vision-Instruct",
|
||||
"object": "chat.completion",
|
||||
"system_fingerprint": "2.4.2-dev0-native",
|
||||
"system_fingerprint": "3.1.1-dev0-native",
|
||||
"usage": {
|
||||
"completion_tokens": 10,
|
||||
"prompt_tokens": 50,
|
||||
|
@ -5,7 +5,7 @@
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"message": {
|
||||
"content": "In a bustling city, a chicken named Cluck",
|
||||
"content": "In a small town, a chicken named Cluck",
|
||||
"name": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
@ -13,11 +13,11 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1727556016,
|
||||
"created": 1738753833,
|
||||
"id": "",
|
||||
"model": "meta-llama/Llama-3.2-11B-Vision-Instruct",
|
||||
"object": "chat.completion",
|
||||
"system_fingerprint": "2.4.2-dev0-native",
|
||||
"system_fingerprint": "3.1.1-dev0-native",
|
||||
"usage": {
|
||||
"completion_tokens": 10,
|
||||
"prompt_tokens": 50,
|
||||
|
@ -47,8 +47,7 @@ async def test_mllama_simpl(mllama, response_snapshot):
|
||||
"total_tokens": 60,
|
||||
}
|
||||
assert (
|
||||
response.choices[0].message.content
|
||||
== "In a bustling city, a chicken named Cluck"
|
||||
response.choices[0].message.content == "In a small town, a chicken named Cluck"
|
||||
)
|
||||
assert response == response_snapshot
|
||||
|
||||
@ -84,12 +83,12 @@ async def test_mllama_load(mllama, generate_load, response_snapshot):
|
||||
]
|
||||
responses = await asyncio.gather(*futures)
|
||||
|
||||
_ = [response.choices[0].message.content for response in responses]
|
||||
generated_texts = [response.choices[0].message.content for response in responses]
|
||||
|
||||
# XXX: TODO: Fix this test.
|
||||
# assert generated_texts[0] == "In a bustling city, a chicken named Cluck"
|
||||
# assert len(generated_texts) == 4
|
||||
# assert generated_texts, all(
|
||||
# [text == generated_texts[0] for text in generated_texts]
|
||||
# )
|
||||
# assert responses == response_snapshot
|
||||
assert generated_texts[0] == "In a small town, a chicken named Cluck"
|
||||
assert len(generated_texts) == 2
|
||||
assert generated_texts, all(
|
||||
[text == generated_texts[0] for text in generated_texts]
|
||||
)
|
||||
assert responses == response_snapshot
|
||||
|
Loading…
Reference in New Issue
Block a user