From f428f5fc8acd31b48512967dd3534bae56ced855 Mon Sep 17 00:00:00 2001 From: Thanaji Date: Thu, 31 Oct 2024 23:54:34 +0200 Subject: [PATCH] updated release version to 2.0.6 --- Cargo.lock | 10 +++++----- Cargo.toml | 2 +- README.md | 28 ++++++++++++++-------------- server/pyproject.toml | 2 +- 4 files changed, 21 insertions(+), 21 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2e75fe8f..1a57a383 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1528,7 +1528,7 @@ dependencies = [ [[package]] name = "indoc" -version = "2.0.5" +version = "2.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b248f5224d1d606005e02c97f5aa4e88eeb230488bcc03bc9ca4d7991399f2b5" @@ -3552,7 +3552,7 @@ dependencies = [ [[package]] name = "text-generation-benchmark" -version = "2.0.4" +version = "2.0.6" dependencies = [ "average", "clap", @@ -3573,7 +3573,7 @@ dependencies = [ [[package]] name = "text-generation-client" -version = "2.0.4" +version = "2.0.6" dependencies = [ "futures", "grpc-metadata", @@ -3590,7 +3590,7 @@ dependencies = [ [[package]] name = "text-generation-launcher" -version = "2.0.4" +version = "2.0.6" dependencies = [ "clap", "ctrlc", @@ -3609,7 +3609,7 @@ dependencies = [ [[package]] name = "text-generation-router" -version = "2.0.4" +version = "2.0.6" dependencies = [ "async-stream", "axum", diff --git a/Cargo.toml b/Cargo.toml index aafc8435..d2fd5b44 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,7 +9,7 @@ members = [ resolver = "2" [workspace.package] -version = "2.0.4" +version = "2.0.6" edition = "2021" authors = ["Olivier Dehaene"] homepage = "https://github.com/huggingface/text-generation-inference" diff --git a/README.md b/README.md index 7f418d17..fc5be00d 100644 --- a/README.md +++ b/README.md @@ -62,7 +62,7 @@ To use [🤗 text-generation-inference](https://github.com/huggingface/text-gene 1. Pull the official Docker image with: ```bash - docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5 + docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6 ``` > [!NOTE] > Alternatively, you can build the Docker image using the `Dockerfile` located in this folder with: @@ -83,7 +83,7 @@ To use [🤗 text-generation-inference](https://github.com/huggingface/text-gene -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e HF_TOKEN=$hf_token \ -e ENABLE_HPU_GRAPH=true -e LIMIT_HPU_GRAPH=true -e USE_FLASH_ATTENTION=true \ -e FLASH_ATTENTION_RECOMPUTE=true --cap-add=sys_nice --ipc=host \ - ghcr.io/huggingface/tgi-gaudi:2.0.5 --model-id $model --max-input-tokens 1024 \ + ghcr.io/huggingface/tgi-gaudi:2.0.6 --model-id $model --max-input-tokens 1024 \ --max-total-tokens 2048 ``` @@ -97,7 +97,7 @@ To use [🤗 text-generation-inference](https://github.com/huggingface/text-gene -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none \ -e HF_TOKEN=$hf_token -e ENABLE_HPU_GRAPH=true -e LIMIT_HPU_GRAPH=true \ -e USE_FLASH_ATTENTION=true -e FLASH_ATTENTION_RECOMPUTE=true --cap-add=sys_nice \ - --ipc=host ghcr.io/huggingface/tgi-gaudi:2.0.5 --model-id $model --sharded true \ + --ipc=host ghcr.io/huggingface/tgi-gaudi:2.0.6 --model-id $model --sharded true \ --num-shard 8 --max-input-tokens 1024 --max-total-tokens 2048 ``` 3. Wait for the TGI-Gaudi server to come online. You will see something like so: @@ -140,7 +140,7 @@ docker run -p 8080:80 \ -e FLASH_ATTENTION_RECOMPUTE=true \ --cap-add=sys_nice \ --ipc=host \ - ghcr.io/huggingface/tgi-gaudi:2.0.5 \ + ghcr.io/huggingface/tgi-gaudi:2.0.6 \ --model-id $model \ --max-input-length 1024 --max-total-tokens 2048 \ --max-batch-prefill-tokens 2048 --max-batch-total-tokens 65536 \ @@ -172,7 +172,7 @@ docker run -p 8080:80 \ -e FLASH_ATTENTION_RECOMPUTE=true \ --cap-add=sys_nice \ --ipc=host \ - ghcr.io/huggingface/tgi-gaudi:2.0.5 \ + ghcr.io/huggingface/tgi-gaudi:2.0.6 \ --model-id $model \ --sharded true --num-shard 8 \ --max-input-length 1024 --max-total-tokens 2048 \ @@ -204,7 +204,7 @@ docker run -p 8080:80 \ -e FLASH_ATTENTION_RECOMPUTE=true \ --cap-add=sys_nice \ --ipc=host \ - ghcr.io/huggingface/tgi-gaudi:2.0.5 \ + ghcr.io/huggingface/tgi-gaudi:2.0.6 \ --model-id $model \ --max-input-length 1024 --max-total-tokens 2048 \ --max-batch-prefill-tokens 2048 --max-batch-total-tokens 65536 \ @@ -236,7 +236,7 @@ docker run -p 8080:80 \ -e FLASH_ATTENTION_RECOMPUTE=true \ --cap-add=sys_nice \ --ipc=host \ - ghcr.io/huggingface/tgi-gaudi:2.0.5 \ + ghcr.io/huggingface/tgi-gaudi:2.0.6 \ --model-id $model \ --sharded true --num-shard 8 \ --max-input-length 1024 --max-total-tokens 2048 \ @@ -268,7 +268,7 @@ docker run -p 8080:80 \ -e BATCH_BUCKET_SIZE=1 \ --cap-add=sys_nice \ --ipc=host \ - ghcr.io/huggingface/tgi-gaudi:2.0.5 \ + ghcr.io/huggingface/tgi-gaudi:2.0.6 \ --model-id $model \ --max-input-tokens 4096 --max-batch-prefill-tokens 16384 \ --max-total-tokens 8192 --max-batch-total-tokens 32768 @@ -319,7 +319,7 @@ docker run -p 8080:80 \ -e FLASH_ATTENTION_RECOMPUTE=true \ --cap-add=sys_nice \ --ipc=host \ - ghcr.io/huggingface/tgi-gaudi:2.0.5 \ + ghcr.io/huggingface/tgi-gaudi:2.0.6 \ --model-id $model \ --max-input-length 1024 --max-total-tokens 2048 \ --max-batch-prefill-tokens 2048 --max-batch-total-tokens 65536 \ @@ -354,7 +354,7 @@ docker run -p 8080:80 \ -e FLASH_ATTENTION_RECOMPUTE=true \ --cap-add=sys_nice \ --ipc=host \ - ghcr.io/huggingface/tgi-gaudi:2.0.5 \ + ghcr.io/huggingface/tgi-gaudi:2.0.6 \ --model-id $model \ --sharded true --num-shard 8 \ --max-input-length 1024 --max-total-tokens 2048 \ @@ -390,7 +390,7 @@ docker run -p 8080:80 \ -e FLASH_ATTENTION_RECOMPUTE=true \ --cap-add=sys_nice \ --ipc=host \ - ghcr.io/huggingface/tgi-gaudi:2.0.5 \ + ghcr.io/huggingface/tgi-gaudi:2.0.6 \ --model-id $model \ --max-input-length 1024 --max-total-tokens 2048 \ --max-batch-prefill-tokens 2048 --max-batch-total-tokens 65536 \ @@ -425,7 +425,7 @@ docker run -p 8080:80 \ -e FLASH_ATTENTION_RECOMPUTE=true \ --cap-add=sys_nice \ --ipc=host \ - ghcr.io/huggingface/tgi-gaudi:2.0.5 \ + ghcr.io/huggingface/tgi-gaudi:2.0.6 \ --model-id $model \ --sharded true --num-shard 8 \ --max-input-length 1024 --max-total-tokens 2048 \ @@ -458,7 +458,7 @@ docker run -p 8080:80 \ -e BATCH_BUCKET_SIZE=1 \ --cap-add=sys_nice \ --ipc=host \ - ghcr.io/huggingface/tgi-gaudi:2.0.5 \ + ghcr.io/huggingface/tgi-gaudi:2.0.6 \ --model-id $model \ --max-input-tokens 4096 --max-batch-prefill-tokens 16384 \ --max-total-tokens 8192 --max-batch-total-tokens 32768 @@ -489,7 +489,7 @@ docker run -p 8080:80 \ -e BATCH_BUCKET_SIZE=1 \ --cap-add=sys_nice \ --ipc=host \ - ghcr.io/huggingface/tgi-gaudi:2.0.5 \ + ghcr.io/huggingface/tgi-gaudi:2.0.6 \ --model-id $model \ --sharded true --num-shard 8 \ --max-input-tokens 4096 --max-batch-prefill-tokens 16384 \ diff --git a/server/pyproject.toml b/server/pyproject.toml index 46a51311..38344ddd 100644 --- a/server/pyproject.toml +++ b/server/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "text-generation-server" -version = "2.0.4" +version = "2.0.6" description = "Text Generation Inference Python gRPC Server" authors = ["Olivier Dehaene "]