revert build

2025-09-09 19:34:53 +00:00 · 2023-04-11 11:08:20 +02:00 · 2023-04-11 11:08:20 +02:00 · c2beaa279e
commit c2beaa279e
parent a1a6b5cc20
2 changed files with 9 additions and 4 deletions
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@ -62,16 +62,23 @@ jobs:
        id: meta
        uses: docker/metadata-action@v4.3.0
        with:
          flavor: |
            latest=auto
          images: |
            ghcr.io/huggingface/text-generation-inference
            registry.internal.huggingface.tech/api-inference/community/text-generation-inference
            db4c2190dd824d1f950f5d1555fbadf0.azurecr.io/text-generation-inference
          tags: |
-            type=raw,value=llama-sha-${{ env.GITHUB_SHA_SHORT }}
+            type=semver,pattern={{version}}
            type=semver,pattern={{major}}.{{minor}}
            type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) }}
            type=raw,value=sha-${{ env.GITHUB_SHA_SHORT }}
      - name: Build and push Docker image
        uses: docker/build-push-action@v2
        with:
          context: .
          file: Dockerfile
-          push: true
+          push: ${{ github.event_name != 'pull_request' }}
          platforms: 'linux/amd64'
          tags: ${{ steps.meta.outputs.tags }}
          labels: ${{ steps.meta.outputs.labels }}
--- a/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py
@ -233,8 +233,6 @@ class PositionRotaryEmbedding(RotaryEmbedding):
        ):
            self._seq_len_cached = seqlen
            t = torch.arange(seqlen, device=device, dtype=self.inv_freq.dtype)
            # Don't do einsum, it converts fp32 to fp16
            # freqs = torch.einsum("i,j->ij", t, self.inv_freq)
            freqs = torch.outer(t, self.inv_freq.to(device=t.device))
            self._cos_cached = torch.cos(freqs).to(dtype)
            self._sin_cached = torch.sin(freqs).to(dtype)