From 457e72c386611c4cd6c0bacdd5545a221ace9dcb Mon Sep 17 00:00:00 2001 From: OlivierDehaene <23298448+OlivierDehaene@users.noreply.github.com> Date: Thu, 16 Nov 2023 13:54:58 +0100 Subject: [PATCH 1/3] v1.1.1 --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index a91caaba..4fcef3dc 100644 --- a/README.md +++ b/README.md @@ -62,7 +62,7 @@ For a detailed starting guide, please see the [Quick Tour](https://huggingface.c model=tiiuae/falcon-7b-instruct volume=$PWD/data # share a volume with the Docker container to avoid downloading weights every run -docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:1.1.0 --model-id $model +docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:1.1.1 --model-id $model ``` And then you can make requests like @@ -104,7 +104,7 @@ model=meta-llama/Llama-2-7b-chat-hf volume=$PWD/data # share a volume with the Docker container to avoid downloading weights every run token= -docker run --gpus all --shm-size 1g -e HUGGING_FACE_HUB_TOKEN=$token -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:1.1.0 --model-id $model +docker run --gpus all --shm-size 1g -e HUGGING_FACE_HUB_TOKEN=$token -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:1.1.1 --model-id $model ``` ### A note on Shared Memory (shm) From 8acdc1fae79053ae08f7cf809e1d6331f3a6a8c8 Mon Sep 17 00:00:00 2001 From: OlivierDehaene <23298448+OlivierDehaene@users.noreply.github.com> Date: Thu, 16 Nov 2023 18:35:09 +0100 Subject: [PATCH 2/3] hotfix 1.1.1 --- .github/workflows/build.yaml | 38 ------------------------------------ 1 file changed, 38 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 124e6a33..11a95f4b 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -79,11 +79,6 @@ jobs: install: true - name: Inject slug/short variables uses: rlespinasse/github-slug-action@v4.4.1 - - name: Install cosign - if: github.event_name != 'pull_request' - uses: sigstore/cosign-installer@f3c664df7af409cb4873aa5068053ba9d61a57b6 #v2.6.0 - with: - cosign-release: 'v1.13.1' - name: Tailscale uses: tailscale/github-action@7bd8039bf25c23c4ab1b8d6e2cc2da2280601966 with: @@ -150,39 +145,6 @@ jobs: labels: ${{ steps.meta.outputs.labels || steps.meta-pr.outputs.labels }} cache-from: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache,mode=min cache-to: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache,mode=min - # Sign the resulting Docker image digest except on PRs. - # This will only write to the public Rekor transparency log when the Docker - # repository is public to avoid leaking data. - - name: Sign the published Docker image - if: ${{ github.event_name != 'pull_request' }} - env: - COSIGN_EXPERIMENTAL: "true" - # This step uses the identity token to provision an ephemeral certificate - # against the sigstore community Fulcio instance. - run: echo "${{ steps.meta.outputs.tags }}" | xargs -I {} cosign sign {}@${{ steps.build-and-push.outputs.digest }} - - name: Run Trivy in GitHub SBOM mode and submit results to Dependency Graph - uses: aquasecurity/trivy-action@master - if: ${{ github.event_name != 'pull_request' }} - with: - image-ref: 'ghcr.io/huggingface/text-generation-inference:sha-${{ env.GITHUB_SHA_SHORT }}' - format: 'github' - output: 'dependency-results.sbom.json' - github-pat: ${{ secrets.GITHUB_TOKEN }} - scanners: 'vuln' - - name: Run Trivy vulnerability scanner - uses: aquasecurity/trivy-action@master - if: ${{ github.event_name != 'pull_request' }} - with: - image-ref: 'ghcr.io/huggingface/text-generation-inference:sha-${{ env.GITHUB_SHA_SHORT }}' - format: 'sarif' - output: 'trivy-results.sarif' - severity: 'CRITICAL' - scanners: 'vuln' - - name: Upload Trivy scan results to GitHub Security tab - uses: github/codeql-action/upload-sarif@v2 - if: ${{ github.event_name != 'pull_request' }} - with: - sarif_file: 'trivy-results.sarif' integration-tests: concurrency: From 3dbc649b11d14955a3d3448ed6db373a0563dfe9 Mon Sep 17 00:00:00 2001 From: OlivierDehaene Date: Mon, 20 Nov 2023 10:33:44 +0100 Subject: [PATCH 3/3] fix: do not leak inputs on error (#1228) Close #1225 --- router/src/infer.rs | 6 +++--- router/src/validation.rs | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/router/src/infer.rs b/router/src/infer.rs index cc34c466..aa6dc664 100644 --- a/router/src/infer.rs +++ b/router/src/infer.rs @@ -82,7 +82,7 @@ impl Infer { } /// Add a new request to the queue and return a stream of InferStreamResponse - #[instrument(skip(self))] + #[instrument(skip_all)] pub(crate) async fn generate_stream( &self, request: GenerateRequest, @@ -133,7 +133,7 @@ impl Infer { } /// Add a new request to the queue and return a InferResponse - #[instrument(skip(self))] + #[instrument(skip_all)] pub(crate) async fn generate( &self, request: GenerateRequest, @@ -214,7 +214,7 @@ impl Infer { } /// Add best_of new requests to the queue and return a InferResponse of the sequence with /// the highest log probability per token - #[instrument(skip(self))] + #[instrument(skip(self, request))] pub(crate) async fn generate_best_of( &self, request: GenerateRequest, diff --git a/router/src/validation.rs b/router/src/validation.rs index 7a84640d..1b47fc97 100644 --- a/router/src/validation.rs +++ b/router/src/validation.rs @@ -69,7 +69,7 @@ impl Validation { } } - #[instrument(skip_all)] + #[instrument(skip(self, inputs))] async fn validate_input( &self, inputs: String,