mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-19 22:02:06 +00:00
Updating mllama after strftime. (#2993)
* Updating mllama after strftime.
* Town instead village.
* Forgot the integration snapshot.
* Attempt to fix intel CPU.
* Intel extension fix.
* Workaround intel.
* Moving those deps directly into pyproject.
* Revert "Moving those deps directly into pyproject."
This reverts commit 98c1496ea6
.
* Non system uv.
* Fixing the docker environment hopefully.
* Missed a step.
* Move workdir up a bit.
* Bailing out of reproducible python env.
* Triton version.
This commit is contained in:
parent
856709d5c3
commit
4b8cda684b
@ -215,16 +215,9 @@ COPY server server
|
|||||||
COPY server/Makefile server/Makefile
|
COPY server/Makefile server/Makefile
|
||||||
ENV UV_SYSTEM_PYTHON=1
|
ENV UV_SYSTEM_PYTHON=1
|
||||||
RUN cd server && \
|
RUN cd server && \
|
||||||
|
make gen-server && \
|
||||||
pip install -U pip uv && \
|
pip install -U pip uv && \
|
||||||
uv sync --frozen --extra gen --extra accelerate --extra compressed-tensors --extra quantize --extra peft --extra outlines --no-install-project && \
|
uv pip install -e ".[accelerate, compressed-tensors, peft, outlines]" --no-cache-dir
|
||||||
. ./.venv/bin/activate && \
|
|
||||||
make gen-server-raw
|
|
||||||
|
|
||||||
RUN cd server && \
|
|
||||||
uv sync --frozen --extra gen --extra accelerate --extra compressed-tensors --extra quantize --extra peft --extra outlines && \
|
|
||||||
. ./.venv/bin/activate && \
|
|
||||||
pwd && \
|
|
||||||
text-generation-server --help
|
|
||||||
|
|
||||||
# Install benchmarker
|
# Install benchmarker
|
||||||
COPY --from=builder /usr/src/target/release-opt/text-generation-benchmark /usr/local/bin/text-generation-benchmark
|
COPY --from=builder /usr/src/target/release-opt/text-generation-benchmark /usr/local/bin/text-generation-benchmark
|
||||||
@ -238,8 +231,5 @@ ENV ATTENTION=flashdecoding-ipex
|
|||||||
ENV PREFIX_CACHING=1
|
ENV PREFIX_CACHING=1
|
||||||
ENV PREFILL_CHUNKING=1
|
ENV PREFILL_CHUNKING=1
|
||||||
ENV CUDA_GRAPHS=0
|
ENV CUDA_GRAPHS=0
|
||||||
COPY ./tgi-entrypoint.sh /tgi-entrypoint.sh
|
ENTRYPOINT ["text-generation-launcher"]
|
||||||
RUN chmod +x /tgi-entrypoint.sh
|
|
||||||
|
|
||||||
ENTRYPOINT ["/tgi-entrypoint.sh"]
|
|
||||||
CMD ["--json-output"]
|
CMD ["--json-output"]
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
"index": 0,
|
"index": 0,
|
||||||
"logprobs": null,
|
"logprobs": null,
|
||||||
"message": {
|
"message": {
|
||||||
"content": "In a bustling city, a chicken named Cluck",
|
"content": "In a small town, a chicken named Cluck",
|
||||||
"name": null,
|
"name": null,
|
||||||
"role": "assistant",
|
"role": "assistant",
|
||||||
"tool_calls": null
|
"tool_calls": null
|
||||||
@ -14,11 +14,11 @@
|
|||||||
"usage": null
|
"usage": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1727773835,
|
"created": 1738753835,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.2-11B-Vision-Instruct",
|
"model": "meta-llama/Llama-3.2-11B-Vision-Instruct",
|
||||||
"object": "chat.completion",
|
"object": "chat.completion",
|
||||||
"system_fingerprint": "2.4.2-dev0-native",
|
"system_fingerprint": "3.1.1-dev0-native",
|
||||||
"usage": {
|
"usage": {
|
||||||
"completion_tokens": 10,
|
"completion_tokens": 10,
|
||||||
"prompt_tokens": 50,
|
"prompt_tokens": 50,
|
||||||
@ -32,7 +32,7 @@
|
|||||||
"index": 0,
|
"index": 0,
|
||||||
"logprobs": null,
|
"logprobs": null,
|
||||||
"message": {
|
"message": {
|
||||||
"content": "In a world where even chickens could dream big,",
|
"content": "In a small town, a chicken named Cluck",
|
||||||
"name": null,
|
"name": null,
|
||||||
"role": "assistant",
|
"role": "assistant",
|
||||||
"tool_calls": null
|
"tool_calls": null
|
||||||
@ -40,63 +40,11 @@
|
|||||||
"usage": null
|
"usage": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1727773835,
|
"created": 1738753835,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.2-11B-Vision-Instruct",
|
"model": "meta-llama/Llama-3.2-11B-Vision-Instruct",
|
||||||
"object": "chat.completion",
|
"object": "chat.completion",
|
||||||
"system_fingerprint": "2.4.2-dev0-native",
|
"system_fingerprint": "3.1.1-dev0-native",
|
||||||
"usage": {
|
|
||||||
"completion_tokens": 10,
|
|
||||||
"prompt_tokens": 50,
|
|
||||||
"total_tokens": 60
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"choices": [
|
|
||||||
{
|
|
||||||
"finish_reason": "length",
|
|
||||||
"index": 0,
|
|
||||||
"logprobs": null,
|
|
||||||
"message": {
|
|
||||||
"content": "In a world where even chickens could dream big,",
|
|
||||||
"name": null,
|
|
||||||
"role": "assistant",
|
|
||||||
"tool_calls": null
|
|
||||||
},
|
|
||||||
"usage": null
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"created": 1727773835,
|
|
||||||
"id": "",
|
|
||||||
"model": "meta-llama/Llama-3.2-11B-Vision-Instruct",
|
|
||||||
"object": "chat.completion",
|
|
||||||
"system_fingerprint": "2.4.2-dev0-native",
|
|
||||||
"usage": {
|
|
||||||
"completion_tokens": 10,
|
|
||||||
"prompt_tokens": 50,
|
|
||||||
"total_tokens": 60
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"choices": [
|
|
||||||
{
|
|
||||||
"finish_reason": "length",
|
|
||||||
"index": 0,
|
|
||||||
"logprobs": null,
|
|
||||||
"message": {
|
|
||||||
"content": "In a world where even chickens could dream big,",
|
|
||||||
"name": null,
|
|
||||||
"role": "assistant",
|
|
||||||
"tool_calls": null
|
|
||||||
},
|
|
||||||
"usage": null
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"created": 1727773835,
|
|
||||||
"id": "",
|
|
||||||
"model": "meta-llama/Llama-3.2-11B-Vision-Instruct",
|
|
||||||
"object": "chat.completion",
|
|
||||||
"system_fingerprint": "2.4.2-dev0-native",
|
|
||||||
"usage": {
|
"usage": {
|
||||||
"completion_tokens": 10,
|
"completion_tokens": 10,
|
||||||
"prompt_tokens": 50,
|
"prompt_tokens": 50,
|
||||||
|
@ -5,7 +5,7 @@
|
|||||||
"index": 0,
|
"index": 0,
|
||||||
"logprobs": null,
|
"logprobs": null,
|
||||||
"message": {
|
"message": {
|
||||||
"content": "In a bustling city, a chicken named Cluck",
|
"content": "In a small town, a chicken named Cluck",
|
||||||
"name": null,
|
"name": null,
|
||||||
"role": "assistant",
|
"role": "assistant",
|
||||||
"tool_calls": null
|
"tool_calls": null
|
||||||
@ -13,11 +13,11 @@
|
|||||||
"usage": null
|
"usage": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1727556016,
|
"created": 1738753833,
|
||||||
"id": "",
|
"id": "",
|
||||||
"model": "meta-llama/Llama-3.2-11B-Vision-Instruct",
|
"model": "meta-llama/Llama-3.2-11B-Vision-Instruct",
|
||||||
"object": "chat.completion",
|
"object": "chat.completion",
|
||||||
"system_fingerprint": "2.4.2-dev0-native",
|
"system_fingerprint": "3.1.1-dev0-native",
|
||||||
"usage": {
|
"usage": {
|
||||||
"completion_tokens": 10,
|
"completion_tokens": 10,
|
||||||
"prompt_tokens": 50,
|
"prompt_tokens": 50,
|
||||||
|
@ -47,8 +47,7 @@ async def test_mllama_simpl(mllama, response_snapshot):
|
|||||||
"total_tokens": 60,
|
"total_tokens": 60,
|
||||||
}
|
}
|
||||||
assert (
|
assert (
|
||||||
response.choices[0].message.content
|
response.choices[0].message.content == "In a small town, a chicken named Cluck"
|
||||||
== "In a bustling city, a chicken named Cluck"
|
|
||||||
)
|
)
|
||||||
assert response == response_snapshot
|
assert response == response_snapshot
|
||||||
|
|
||||||
@ -84,12 +83,12 @@ async def test_mllama_load(mllama, generate_load, response_snapshot):
|
|||||||
]
|
]
|
||||||
responses = await asyncio.gather(*futures)
|
responses = await asyncio.gather(*futures)
|
||||||
|
|
||||||
_ = [response.choices[0].message.content for response in responses]
|
generated_texts = [response.choices[0].message.content for response in responses]
|
||||||
|
|
||||||
# XXX: TODO: Fix this test.
|
# XXX: TODO: Fix this test.
|
||||||
# assert generated_texts[0] == "In a bustling city, a chicken named Cluck"
|
assert generated_texts[0] == "In a small town, a chicken named Cluck"
|
||||||
# assert len(generated_texts) == 4
|
assert len(generated_texts) == 2
|
||||||
# assert generated_texts, all(
|
assert generated_texts, all(
|
||||||
# [text == generated_texts[0] for text in generated_texts]
|
[text == generated_texts[0] for text in generated_texts]
|
||||||
# )
|
)
|
||||||
# assert responses == response_snapshot
|
assert responses == response_snapshot
|
||||||
|
Loading…
Reference in New Issue
Block a user