push image to test

2025-09-09 11:24:53 +00:00 · 2023-04-24 16:08:27 +02:00 · 2023-04-24 16:08:27 +02:00 · 885411e747
commit 885411e747
parent c69f24d16b
3 changed files with 16 additions and 14 deletions
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@ -80,8 +80,8 @@ jobs:
            latest=auto
          images: |
            registry.internal.huggingface.tech/api-inference/community/text-generation-inference
-            ghcr.io/huggingface/text-generation-inference
-            db4c2190dd824d1f950f5d1555fbadf0.azurecr.io/text-generation-inference
+#            ghcr.io/huggingface/text-generation-inference
+#            db4c2190dd824d1f950f5d1555fbadf0.azurecr.io/text-generation-inference
          tags: |
            type=semver,pattern={{version}}
            type=semver,pattern={{major}}.{{minor}}
@ -93,7 +93,8 @@ jobs:
        with:
          context: .
          file: Dockerfile
-          push: ${{ github.event_name != 'pull_request' }}
+#          push: ${{ github.event_name != 'pull_request' }}
+          push: true
          platforms: 'linux/amd64'
          build-args: |
            GIT_SHA=${{ env.GITHUB_SHA }}
--- a/server/text_generation_server/models/causal_lm.py
+++ b/server/text_generation_server/models/causal_lm.py
@ -337,6 +337,10 @@ class CausalLMBatch(Batch):
                        layer[k] = t.view(len(batch), -1, *t.shape[-2:])

            start_index = end_index
+            # Add eventual padding tokens that were added while concatenating
+            max_tokens += batch.max_tokens + (
+                    max_input_length - batch.max_input_length
+            ) * len(batch)

        first_past_kvs = batches[0].past_key_values
        _, num_heads, padded_sequence_length, head_dim = first_past_kvs[0][1].shape
@ -404,10 +408,6 @@ class CausalLMBatch(Batch):

                # Update values
                start_index = end_index
-                # Add eventual padding tokens that were added while concatenating
-                max_tokens += batch.max_tokens + (
-                        max_input_length - batch.max_input_length
-                ) * len(batch)

            past_key_values.append([padded_past_keys, padded_past_values])

--- a/server/text_generation_server/models/seq2seq_lm.py
+++ b/server/text_generation_server/models/seq2seq_lm.py
@ -390,6 +390,13 @@ class Seq2SeqLMBatch(Batch):
                batch.past_key_values = [[t for t in layer] for layer in batch.past_key_values]

            start_index = end_index
+            # Add eventual padding tokens that were added while concatenating
+            max_tokens += batch.max_tokens + (
+                    max_input_length
+                    - batch.max_input_length
+                    + max_decoder_input_length
+                    - batch.max_decoder_input_length
+            ) * len(batch)

        # Determine shapes for new past kv tensors
        first_past_kvs = batches[0].past_key_values
@ -455,13 +462,7 @@ class Seq2SeqLMBatch(Batch):
                    del t

                    start_index = end_index
-                    # Add eventual padding tokens that were added while concatenating
-                    max_tokens += batch.max_tokens + (
-                            max_input_length
-                            - batch.max_input_length
-                            + max_decoder_input_length
-                            - batch.max_decoder_input_length
-                    ) * len(batch)
+

        return cls(
            batch_id=batches[0].batch_id,