2023-04-16 22:26:47 +00:00
|
|
|
# Rust builder
|
2024-05-06 11:48:11 +00:00
|
|
|
FROM lukemathwalker/cargo-chef:latest-rust-1.78 AS chef
|
2023-03-03 14:07:27 +00:00
|
|
|
WORKDIR /usr/src
|
|
|
|
|
|
|
|
FROM chef as planner
|
|
|
|
COPY Cargo.toml Cargo.toml
|
|
|
|
COPY rust-toolchain.toml rust-toolchain.toml
|
|
|
|
COPY proto proto
|
2023-05-09 12:39:59 +00:00
|
|
|
COPY benchmark benchmark
|
2023-03-03 14:07:27 +00:00
|
|
|
COPY router router
|
|
|
|
COPY launcher launcher
|
|
|
|
RUN cargo chef prepare --recipe-path recipe.json
|
|
|
|
|
|
|
|
FROM chef AS builder
|
2023-02-13 12:02:45 +00:00
|
|
|
|
|
|
|
RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \
|
|
|
|
curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \
|
|
|
|
unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \
|
|
|
|
unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \
|
|
|
|
rm -f $PROTOC_ZIP
|
2022-10-14 13:56:21 +00:00
|
|
|
|
2023-03-03 14:07:27 +00:00
|
|
|
COPY --from=planner /usr/src/recipe.json recipe.json
|
2024-06-19 21:44:47 +00:00
|
|
|
COPY Cargo.lock Cargo.lock
|
2023-03-03 14:07:27 +00:00
|
|
|
RUN cargo chef cook --release --recipe-path recipe.json
|
2022-10-14 13:56:21 +00:00
|
|
|
|
2023-03-03 14:07:27 +00:00
|
|
|
COPY Cargo.toml Cargo.toml
|
2022-11-08 16:42:38 +00:00
|
|
|
COPY rust-toolchain.toml rust-toolchain.toml
|
2022-10-14 13:56:21 +00:00
|
|
|
COPY proto proto
|
2023-05-09 12:39:59 +00:00
|
|
|
COPY benchmark benchmark
|
2022-10-14 13:56:21 +00:00
|
|
|
COPY router router
|
2022-10-18 13:19:03 +00:00
|
|
|
COPY launcher launcher
|
2023-03-03 14:07:27 +00:00
|
|
|
RUN cargo build --release
|
2022-10-18 13:19:03 +00:00
|
|
|
|
2023-04-16 22:26:47 +00:00
|
|
|
# Text Generation Inference base image
|
2024-08-26 08:49:29 +00:00
|
|
|
FROM vault.habana.ai/gaudi-docker/1.17.0/ubuntu22.04/habanalabs/pytorch-installer-2.3.1:latest as base
|
2023-04-16 22:26:47 +00:00
|
|
|
|
|
|
|
# Text Generation Inference base env
|
|
|
|
ENV HUGGINGFACE_HUB_CACHE=/data \
|
2023-02-18 13:04:11 +00:00
|
|
|
HF_HUB_ENABLE_HF_TRANSFER=1 \
|
2023-04-16 22:26:47 +00:00
|
|
|
PORT=80
|
2022-10-14 13:56:21 +00:00
|
|
|
|
2023-12-05 10:12:16 +00:00
|
|
|
# libssl.so.1.1 is not installed on Ubuntu 22.04 by default, install it
|
|
|
|
RUN wget http://nz2.archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2_amd64.deb && \
|
|
|
|
dpkg -i ./libssl1.1_1.1.1f-1ubuntu2_amd64.deb
|
|
|
|
|
2023-04-14 17:30:30 +00:00
|
|
|
WORKDIR /usr/src
|
2023-03-24 13:02:14 +00:00
|
|
|
|
2023-04-16 22:26:47 +00:00
|
|
|
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
|
|
|
|
libssl-dev \
|
|
|
|
ca-certificates \
|
|
|
|
make \
|
2023-09-26 13:23:47 +00:00
|
|
|
curl \
|
Pali gemma modeling (#1895)
This PR adds paligemma modeling code
Blog post: https://huggingface.co/blog/paligemma
Transformers PR: https://github.com/huggingface/transformers/pull/30814
install the latest changes and run with
```bash
# get the weights
# text-generation-server download-weights gv-hf/PaliGemma-base-224px-hf
# run TGI
text-generation-launcher --model-id gv-hf/PaliGemma-base-224px-hf
```
basic example sending various requests
```python
from huggingface_hub import InferenceClient
client = InferenceClient("http://127.0.0.1:3000")
images = [
"https://huggingface.co/datasets/hf-internal-testing/fixtures-captioning/resolve/main/cow_beach_1.png",
"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/rabbit.png",
]
prompts = [
"What animal is in this image?",
"Name three colors in this image.",
"What are 10 colors in this image?",
"Where is the cow standing?",
"answer en Where is the cow standing?",
"Is there a bird in the image?",
"Is ther a cow in the image?",
"Is there a rabbit in the image?",
"how many birds are in the image?",
"how many rabbits are in the image?",
]
for img in images:
print(f"\nImage: {img.split('/')[-1]}")
for prompt in prompts:
inputs = f"{prompt}\n"
json_data = {
"inputs": inputs,
"parameters": {
"max_new_tokens": 30,
"do_sample": False,
},
}
generated_output = client.text_generation(prompt, max_new_tokens=30, stream=False)
print([f"{prompt}\n{generated_output}"])
```
---------
Co-authored-by: Nicolas Patry <patry.nicolas@protonmail.com>
2024-05-16 04:58:47 +00:00
|
|
|
git \
|
2023-04-16 22:26:47 +00:00
|
|
|
&& rm -rf /var/lib/apt/lists/*
|
2022-10-14 13:56:21 +00:00
|
|
|
|
|
|
|
# Install server
|
2022-10-22 18:00:15 +00:00
|
|
|
COPY proto proto
|
2022-10-14 13:56:21 +00:00
|
|
|
COPY server server
|
2023-04-16 22:26:47 +00:00
|
|
|
COPY server/Makefile server/Makefile
|
2022-10-14 13:56:21 +00:00
|
|
|
RUN cd server && \
|
2022-10-22 18:00:15 +00:00
|
|
|
make gen-server && \
|
2023-04-16 22:26:47 +00:00
|
|
|
pip install -r requirements.txt && \
|
2024-04-23 19:57:39 +00:00
|
|
|
bash ./dill-0.3.8-patch.sh && \
|
2024-08-26 08:49:29 +00:00
|
|
|
pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.17.0 && \
|
2023-12-05 10:12:16 +00:00
|
|
|
pip install . --no-cache-dir
|
2022-10-14 13:56:21 +00:00
|
|
|
|
2023-05-09 11:19:31 +00:00
|
|
|
# Install benchmarker
|
|
|
|
COPY --from=builder /usr/src/target/release/text-generation-benchmark /usr/local/bin/text-generation-benchmark
|
2022-10-14 13:56:21 +00:00
|
|
|
# Install router
|
2023-03-03 14:07:27 +00:00
|
|
|
COPY --from=builder /usr/src/target/release/text-generation-router /usr/local/bin/text-generation-router
|
2022-10-22 18:00:15 +00:00
|
|
|
# Install launcher
|
2023-03-03 14:07:27 +00:00
|
|
|
COPY --from=builder /usr/src/target/release/text-generation-launcher /usr/local/bin/text-generation-launcher
|
2022-10-14 13:56:21 +00:00
|
|
|
|
2023-04-14 08:12:21 +00:00
|
|
|
# Final image
|
2023-03-29 19:38:30 +00:00
|
|
|
FROM base
|
|
|
|
|
2024-07-17 05:08:52 +00:00
|
|
|
ENTRYPOINT ["text-generation-launcher"]
|
|
|
|
CMD ["--json-output"]
|