fix imports

2025-09-11 04:14:52 +00:00 · 2023-12-15 11:35:31 +01:00 · 2023-12-15 11:35:31 +01:00 · 5b6367f87c
commit 5b6367f87c
parent 68990a5635
2 changed files with 42 additions and 39 deletions
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@ -146,11 +146,50 @@ jobs:
          cache-from: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache,mode=min
          cache-to: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache,mode=min
  integration-tests:
    concurrency:
      group: ${{ github.workflow }}-${{ github.job }}-${{ github.head_ref || github.run_id }}
      cancel-in-progress: true
    needs:
      - start-runner
      - build-and-push-image # Wait for the docker image to be built
    runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner
    env:
      DOCKER_VOLUME: /cache
    steps:
      - uses: actions/checkout@v2
      - name: Inject slug/short variables
        uses: rlespinasse/github-slug-action@v4.4.1
      - name: Set up Python
        uses: actions/setup-python@v4
        with:
          python-version: 3.9
      - name: Tailscale
        uses: tailscale/github-action@7bd8039bf25c23c4ab1b8d6e2cc2da2280601966
        with:
          authkey: ${{ secrets.TAILSCALE_AUTHKEY }}
      - name: Prepare disks
        run: |
          sudo mkfs -t ext4 /dev/nvme1n1
          sudo mkdir ${{ env.DOCKER_VOLUME }}
          sudo mount /dev/nvme1n1 ${{ env.DOCKER_VOLUME }}
      - name: Install
        run: |
          make install-integration-tests
      - name: Run tests
        run: |
          export DOCKER_IMAGE=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:sha-${{ env.GITHUB_SHA_SHORT }}
          export HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }}
          pytest -s -vv integration-tests
  build-and-push-image-rocm:
    concurrency:
      group: ${{ github.workflow }}-build-and-push-image-rocm-${{ github.head_ref || github.run_id }}
      cancel-in-progress: true
-    needs: start-runner # required to start the main job when the runner is ready
+    needs:
      - start-runner
      - build-and-push-image # Wait for the main docker image to be built
      - integration-tests # Wait for the main integration-tests
    runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner
    permissions:
      contents: write
@ -235,43 +274,6 @@ jobs:
          cache-from: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache-rocm,mode=min
          cache-to: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache-rocm,mode=min
  integration-tests:
    concurrency:
      group: ${{ github.workflow }}-${{ github.job }}-${{ github.head_ref || github.run_id }}
      cancel-in-progress: true
    needs:
      - start-runner
      - build-and-push-image # Wait for the docker image to be built
      - build-and-push-image-rocm
    runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner
    env:
      DOCKER_VOLUME: /cache
    steps:
      - uses: actions/checkout@v2
      - name: Inject slug/short variables
        uses: rlespinasse/github-slug-action@v4.4.1
      - name: Set up Python
        uses: actions/setup-python@v4
        with:
          python-version: 3.9
      - name: Tailscale
        uses: tailscale/github-action@7bd8039bf25c23c4ab1b8d6e2cc2da2280601966
        with:
          authkey: ${{ secrets.TAILSCALE_AUTHKEY }}
      - name: Prepare disks
        run: |
          sudo mkfs -t ext4 /dev/nvme1n1
          sudo mkdir ${{ env.DOCKER_VOLUME }}
          sudo mount /dev/nvme1n1 ${{ env.DOCKER_VOLUME }}
      - name: Install
        run: |
          make install-integration-tests
      - name: Run tests
        run: |
          export DOCKER_IMAGE=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:sha-${{ env.GITHUB_SHA_SHORT }}
          export HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }}
          pytest -s -vv integration-tests
  stop-runner:
    name: Stop self-hosted EC2 runner
    needs:
--- a/server/text_generation_server/models/init.py
+++ b/server/text_generation_server/models/init.py
@ -5,7 +5,6 @@ from transformers.configuration_utils import PretrainedConfig
 from transformers.models.auto import modeling_auto
 from typing import Optional
 from text_generation_server.utils.flash_attn import HAS_FLASH_ATTN_V2_CUDA
 from text_generation_server.utils.speculate import get_speculate, set_speculate
 from text_generation_server.models.model import Model
 from text_generation_server.models.causal_lm import CausalLM
@ -58,10 +57,12 @@ try:
    from text_generation_server.models.idefics import IDEFICSSharded
    from text_generation_server.models.flash_mistral import FlashMistral
    from text_generation_server.models.flash_mixtral import FlashMixtral
    from text_generation_server.utils.flash_attn import HAS_FLASH_ATTN_V2_CUDA
 except ImportError as e:
    logger.warning(f"Could not import Flash Attention enabled models: {e}")
    FLASH_ATTENTION = False
    HAS_FLASH_ATTN_V2_CUDA = False
 if FLASH_ATTENTION:
    __all__.append(FlashNeoXSharded)