From 82a7f9eb530ed9e7156512259c1f6dcbe023e260 Mon Sep 17 00:00:00 2001 From: bealbrown Date: Wed, 12 Jul 2023 23:12:05 -0400 Subject: [PATCH] Convert example docker command to use :latest rather than being pegged to 0.9 --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index d31c176b..1f19b1e6 100644 --- a/README.md +++ b/README.md @@ -84,7 +84,7 @@ model=bigscience/bloom-560m num_shard=2 volume=$PWD/data # share a volume with the Docker container to avoid downloading weights every run -docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:0.9 --model-id $model --num-shard $num_shard +docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference --model-id $model --num-shard $num_shard ``` **Note:** To use GPUs, you need to install the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html). We also recommend using NVIDIA drivers with CUDA version 11.8 or higher.