From 457e72c386611c4cd6c0bacdd5545a221ace9dcb Mon Sep 17 00:00:00 2001
From: OlivierDehaene <23298448+OlivierDehaene@users.noreply.github.com>
Date: Thu, 16 Nov 2023 13:54:58 +0100
Subject: [PATCH 1/3] v1.1.1

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/README.md b/README.md
index a91caaba..4fcef3dc 100644
--- a/README.md
+++ b/README.md
@@ -62,7 +62,7 @@ For a detailed starting guide, please see the [Quick Tour](https://huggingface.c
 model=tiiuae/falcon-7b-instruct
 volume=$PWD/data # share a volume with the Docker container to avoid downloading weights every run
 
-docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:1.1.0 --model-id $model
+docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:1.1.1 --model-id $model
 ```
 
 And then you can make requests like
@@ -104,7 +104,7 @@ model=meta-llama/Llama-2-7b-chat-hf
 volume=$PWD/data # share a volume with the Docker container to avoid downloading weights every run
 token=<your cli READ token>
 
-docker run --gpus all --shm-size 1g -e HUGGING_FACE_HUB_TOKEN=$token -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:1.1.0 --model-id $model
+docker run --gpus all --shm-size 1g -e HUGGING_FACE_HUB_TOKEN=$token -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:1.1.1 --model-id $model
 ```
 
 ### A note on Shared Memory (shm)

From 8acdc1fae79053ae08f7cf809e1d6331f3a6a8c8 Mon Sep 17 00:00:00 2001
From: OlivierDehaene <23298448+OlivierDehaene@users.noreply.github.com>
Date: Thu, 16 Nov 2023 18:35:09 +0100
Subject: [PATCH 2/3] hotfix 1.1.1

---
 .github/workflows/build.yaml | 38 ------------------------------------
 1 file changed, 38 deletions(-)

diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index 124e6a33..11a95f4b 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -79,11 +79,6 @@ jobs:
           install: true
       - name: Inject slug/short variables
         uses: rlespinasse/github-slug-action@v4.4.1
-      - name: Install cosign
-        if: github.event_name != 'pull_request'
-        uses: sigstore/cosign-installer@f3c664df7af409cb4873aa5068053ba9d61a57b6 #v2.6.0
-        with:
-          cosign-release: 'v1.13.1'
       - name: Tailscale
         uses: tailscale/github-action@7bd8039bf25c23c4ab1b8d6e2cc2da2280601966
         with:
@@ -150,39 +145,6 @@ jobs:
           labels: ${{ steps.meta.outputs.labels || steps.meta-pr.outputs.labels }}
           cache-from: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache,mode=min
           cache-to: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache,mode=min
-      # Sign the resulting Docker image digest except on PRs.
-      # This will only write to the public Rekor transparency log when the Docker
-      # repository is public to avoid leaking data.
-      - name: Sign the published Docker image
-        if: ${{ github.event_name != 'pull_request' }}
-        env:
-          COSIGN_EXPERIMENTAL: "true"
-        # This step uses the identity token to provision an ephemeral certificate
-        # against the sigstore community Fulcio instance.
-        run: echo "${{ steps.meta.outputs.tags }}" | xargs -I {} cosign sign {}@${{ steps.build-and-push.outputs.digest }}
-      - name: Run Trivy in GitHub SBOM mode and submit results to Dependency Graph
-        uses: aquasecurity/trivy-action@master
-        if: ${{ github.event_name != 'pull_request' }}
-        with:
-          image-ref: 'ghcr.io/huggingface/text-generation-inference:sha-${{ env.GITHUB_SHA_SHORT }}'
-          format: 'github'
-          output: 'dependency-results.sbom.json'
-          github-pat: ${{ secrets.GITHUB_TOKEN }}
-          scanners: 'vuln'
-      - name: Run Trivy vulnerability scanner
-        uses: aquasecurity/trivy-action@master
-        if: ${{ github.event_name != 'pull_request' }}
-        with:
-          image-ref: 'ghcr.io/huggingface/text-generation-inference:sha-${{ env.GITHUB_SHA_SHORT }}'
-          format: 'sarif'
-          output: 'trivy-results.sarif'
-          severity: 'CRITICAL'
-          scanners: 'vuln'
-      - name: Upload Trivy scan results to GitHub Security tab
-        uses: github/codeql-action/upload-sarif@v2
-        if: ${{ github.event_name != 'pull_request' }}
-        with:
-          sarif_file: 'trivy-results.sarif'
 
   integration-tests:
     concurrency:

From 3dbc649b11d14955a3d3448ed6db373a0563dfe9 Mon Sep 17 00:00:00 2001
From: OlivierDehaene <olivier@huggingface.co>
Date: Mon, 20 Nov 2023 10:33:44 +0100
Subject: [PATCH 3/3] fix: do not leak inputs on error (#1228)

Close #1225
---
 router/src/infer.rs      | 6 +++---
 router/src/validation.rs | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/router/src/infer.rs b/router/src/infer.rs
index cc34c466..aa6dc664 100644
--- a/router/src/infer.rs
+++ b/router/src/infer.rs
@@ -82,7 +82,7 @@ impl Infer {
     }
 
     /// Add a new request to the queue and return a stream of InferStreamResponse
-    #[instrument(skip(self))]
+    #[instrument(skip_all)]
     pub(crate) async fn generate_stream(
         &self,
         request: GenerateRequest,
@@ -133,7 +133,7 @@ impl Infer {
     }
 
     /// Add a new request to the queue and return a InferResponse
-    #[instrument(skip(self))]
+    #[instrument(skip_all)]
     pub(crate) async fn generate(
         &self,
         request: GenerateRequest,
@@ -214,7 +214,7 @@ impl Infer {
     }
     /// Add best_of new requests to the queue and return a InferResponse of the sequence with
     /// the highest log probability per token
-    #[instrument(skip(self))]
+    #[instrument(skip(self, request))]
     pub(crate) async fn generate_best_of(
         &self,
         request: GenerateRequest,
diff --git a/router/src/validation.rs b/router/src/validation.rs
index 7a84640d..1b47fc97 100644
--- a/router/src/validation.rs
+++ b/router/src/validation.rs
@@ -69,7 +69,7 @@ impl Validation {
         }
     }
 
-    #[instrument(skip_all)]
+    #[instrument(skip(self, inputs))]
     async fn validate_input(
         &self,
         inputs: String,