From bd1bdebb47472f0947171c5c2e36b3daa593d2e7 Mon Sep 17 00:00:00 2001 From: David Corvoysier Date: Wed, 18 Jun 2025 12:35:36 +0200 Subject: [PATCH] doc: fix README (#3271) --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 3c385b83..b2e818a1 100644 --- a/README.md +++ b/README.md @@ -84,7 +84,7 @@ model=HuggingFaceH4/zephyr-7b-beta volume=$PWD/data docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data \ - ghcr.io/huggingface/text-generation-inference:3.3.2 --model-id $model + ghcr.io/huggingface/text-generation-inference:3.3.3 --model-id $model ``` And then you can make requests like @@ -152,7 +152,7 @@ volume=$PWD/data # share a volume with the Docker container to avoid downloading token= docker run --gpus all --shm-size 1g -e HF_TOKEN=$token -p 8080:80 -v $volume:/data \ - ghcr.io/huggingface/text-generation-inference:3.3.2 --model-id $model + ghcr.io/huggingface/text-generation-inference:3.3.3 --model-id $model ``` ### A note on Shared Memory (shm)