From 253a9924473129412bbf0775d43b3c4ddb76f48c Mon Sep 17 00:00:00 2001 From: yuanwu Date: Mon, 2 Dec 2024 08:45:36 +0000 Subject: [PATCH] Remove the CI workflows we don't currently support Signed-off-by: yuanwu --- .github/workflows/autodocs.yaml | 45 ----- .github/workflows/build.yaml | 191 ------------------ .github/workflows/build_documentation.yaml | 20 -- .github/workflows/build_pr_documentation.yaml | 19 -- .github/workflows/ci_build.yaml | 49 ----- .github/workflows/client-tests.yaml | 26 --- .github/workflows/integration_tests.yaml | 41 ---- .github/workflows/load_test.yaml | 43 ---- .github/workflows/nix_tests.yaml | 41 ---- .github/workflows/push_docker_image.yml | 56 +++++ .github/workflows/stale.yaml | 14 -- .github/workflows/tests.yaml | 63 ------ .github/workflows/trufflehog.yaml | 18 -- .../workflows/upload_pr_documentation.yaml | 16 -- Makefile | 3 - 15 files changed, 56 insertions(+), 589 deletions(-) delete mode 100644 .github/workflows/autodocs.yaml delete mode 100644 .github/workflows/build.yaml delete mode 100644 .github/workflows/build_documentation.yaml delete mode 100644 .github/workflows/build_pr_documentation.yaml delete mode 100644 .github/workflows/ci_build.yaml delete mode 100644 .github/workflows/client-tests.yaml delete mode 100644 .github/workflows/integration_tests.yaml delete mode 100644 .github/workflows/load_test.yaml delete mode 100644 .github/workflows/nix_tests.yaml create mode 100644 .github/workflows/push_docker_image.yml delete mode 100644 .github/workflows/stale.yaml delete mode 100644 .github/workflows/tests.yaml delete mode 100644 .github/workflows/trufflehog.yaml delete mode 100644 .github/workflows/upload_pr_documentation.yaml diff --git a/.github/workflows/autodocs.yaml b/.github/workflows/autodocs.yaml deleted file mode 100644 index a768f263..00000000 --- a/.github/workflows/autodocs.yaml +++ /dev/null @@ -1,45 +0,0 @@ -name: Automatic Documentation for Launcher - -on: - pull_request: - -jobs: - update_docs: - runs-on: ubuntu-latest - - steps: - - name: Checkout code - uses: actions/checkout@v2 - - - name: Set up Rust - uses: actions-rs/toolchain@v1 - with: - profile: minimal - toolchain: stable - - - name: Install Protocol Buffers compiler - run: | - sudo apt-get update - sudo apt-get install -y protobuf-compiler libprotobuf-dev - - - name: Install Launcher - id: install-launcher - run: cargo install --path launcher/ - - - name: Install router - id: install-router - run: cargo install --path backends/v3/ - - - uses: actions/setup-node@v4 - with: - node-version: 22 - - - name: Set up Python - uses: actions/setup-python@v2 - with: - python-version: '3.x' - - - name: Check that documentation is up-to-date - run: | - npm install -g @redocly/cli - python update_doc.py --check diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml deleted file mode 100644 index ce1cdc33..00000000 --- a/.github/workflows/build.yaml +++ /dev/null @@ -1,191 +0,0 @@ -name: Build and push docker image to internal registry - -on: - workflow_call: - inputs: - hardware: - type: string - description: Hardware - # options: - # - cuda - # - rocm - # - intel - required: true - release-tests: - description: "Run release integration tests" - required: true - default: false - type: boolean - -jobs: - build-and-push: - outputs: - docker_image: ${{ steps.final.outputs.docker_image }} - docker_devices: ${{ steps.final.outputs.docker_devices }} - runs_on: ${{ steps.final.outputs.runs_on }} - label: ${{ steps.final.outputs.label }} - concurrency: - group: ${{ github.workflow }}-build-and-push-image-${{ inputs.hardware }}-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - runs-on: - group: aws-highmemory-32-plus-priv - permissions: - contents: write - packages: write - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - name: Inject slug/short variables - uses: rlespinasse/github-slug-action@v4.4.1 - - name: Construct harware variables - shell: bash - run: | - case ${{ inputs.hardware }} in - cuda) - export dockerfile="Dockerfile" - export label_extension="" - export docker_devices="" - export runs_on="aws-g6-12xl-plus-priv-cache" - export platform="" - ;; - rocm) - export dockerfile="Dockerfile_amd" - export label_extension="-rocm" - export docker_devices="/dev/kfd,/dev/dri" - # TODO Re-enable when they pass. - # export runs_on="amd-gpu-tgi" - export runs_on="ubuntu-latest" - export platform="" - ;; - intel-xpu) - export dockerfile="Dockerfile_intel" - export label_extension="-intel-xpu" - export docker_devices="" - export runs_on="ubuntu-latest" - export platform="xpu" - ;; - intel-cpu) - export dockerfile="Dockerfile_intel" - export label_extension="-intel-cpu" - export docker_devices="" - export runs_on="ubuntu-latest" - export platform="cpu" - ;; - esac - echo $dockerfile - echo "Dockerfile=${dockerfile}" - echo $label_extension - echo $docker_devices - echo $runs_on - echo $platform - echo "DOCKERFILE=${dockerfile}" >> $GITHUB_ENV - echo "LABEL=${label_extension}" >> $GITHUB_ENV - echo "PLATFORM=${platform}" >> $GITHUB_ENV - echo "DOCKER_DEVICES=${docker_devices}" >> $GITHUB_ENV - echo "RUNS_ON=${runs_on}" >> $GITHUB_ENV - echo REGISTRY_MIRROR=$REGISTRY_MIRROR >> $GITHUB_ENV - - name: Initialize Docker Buildx - uses: docker/setup-buildx-action@v3 - with: - install: true - buildkitd-config: /tmp/buildkitd.toml - - name: Login to internal Container Registry - uses: docker/login-action@v3 - with: - username: ${{ secrets.REGISTRY_USERNAME }} - password: ${{ secrets.REGISTRY_PASSWORD }} - registry: registry.internal.huggingface.tech - - name: Login to GitHub Container Registry - if: github.event_name != 'pull_request' - uses: docker/login-action@v3 - with: - registry: ghcr.io - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - name: Login to Azure Container Registry - if: github.event_name != 'pull_request' - uses: docker/login-action@v3 - with: - username: ${{ secrets.AZURE_DOCKER_USERNAME }} - password: ${{ secrets.AZURE_DOCKER_PASSWORD }} - registry: db4c2190dd824d1f950f5d1555fbadf0.azurecr.io - # If pull request - - name: Extract metadata (tags, labels) for Docker - if: ${{ github.event_name == 'pull_request' }} - id: meta-pr - uses: docker/metadata-action@v5 - with: - images: | - registry.internal.huggingface.tech/api-inference/community/text-generation-inference - tags: | - type=raw,value=sha-${{ env.GITHUB_SHA_SHORT }}${{ env.LABEL }} - # If main, release or tag - - name: Extract metadata (tags, labels) for Docker - if: ${{ github.event_name != 'pull_request' }} - id: meta - uses: docker/metadata-action@v4.3.0 - with: - flavor: | - latest=auto - images: | - registry.internal.huggingface.tech/api-inference/community/text-generation-inference - ghcr.io/huggingface/text-generation-inference - db4c2190dd824d1f950f5d1555fbadf0.azurecr.io/text-generation-inference - tags: | - type=semver,pattern={{version}}${{ env.LABEL }} - type=semver,pattern={{major}}.{{minor}}${{ env.LABEL }} - type=raw,value=latest${{ env.LABEL }},enable=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) }} - type=raw,value=sha-${{ env.GITHUB_SHA_SHORT }}${{ env.LABEL }} - - name: Build and push Docker image - id: build-and-push - uses: docker/build-push-action@v4 - with: - context: . - file: ${{ env.DOCKERFILE }} - push: true - platforms: 'linux/amd64' - build-args: | - GIT_SHA=${{ env.GITHUB_SHA }} - DOCKER_LABEL=sha-${{ env.GITHUB_SHA_SHORT }}${{ env.LABEL }} - PLATFORM=${{ env.PLATFORM }} - tags: ${{ steps.meta.outputs.tags || steps.meta-pr.outputs.tags }} - labels: ${{ steps.meta.outputs.labels || steps.meta-pr.outputs.labels }} - cache-from: type=s3,region=us-east-1,bucket=ci-docker-buildx-cache,name=text-generation-inference-cache${{ env.LABEL }},mode=min,access_key_id=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_ACCESS_KEY_ID }},secret_access_key=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_SECRET_ACCESS_KEY }},mode=min - cache-to: type=s3,region=us-east-1,bucket=ci-docker-buildx-cache,name=text-generation-inference-cache${{ env.LABEL }},mode=min,access_key_id=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_ACCESS_KEY_ID }},secret_access_key=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_SECRET_ACCESS_KEY }},mode=min - - name: Final - id: final - run: | - echo "docker_image=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:sha-${{ env.GITHUB_SHA_SHORT}}${{ env.LABEL }}" >> "$GITHUB_OUTPUT" - echo "docker_devices=${{ env.DOCKER_DEVICES }}" >> "$GITHUB_OUTPUT" - echo "runs_on=${{ env.RUNS_ON }}" >> "$GITHUB_OUTPUT" - echo "label=${{ env.LABEL }}" >> "$GITHUB_OUTPUT" - integration_tests: - concurrency: - group: ${{ github.workflow }}-${{ github.job }}-${{ needs.build-and-push.outputs.label }}-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - needs: build-and-push - runs-on: - group: ${{ needs.build-and-push.outputs.runs_on }} - if: needs.build-and-push.outputs.runs_on != 'ubuntu-latest' - env: - PYTEST_FLAGS: ${{ (startsWith(github.ref, 'refs/tags/') || github.ref == 'refs/heads/main' || inputs.release-tests == true) && '--release' || '--release' }} - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - name: Inject slug/short variables - uses: rlespinasse/github-slug-action@v4.4.1 - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: "3.10" - - name: Install - run: | - make install-integration-tests - - name: Run tests - run: | - export DOCKER_VOLUME=/mnt/cache - export DOCKER_IMAGE=${{ needs.build-and-push.outputs.docker_image }} - export DOCKER_DEVICES=${{ needs.build-and-push.outputs.docker_devices }} - export HF_TOKEN=${{ secrets.HF_TOKEN }} - echo $DOCKER_IMAGE - pytest -s -vv integration-tests ${PYTEST_FLAGS} diff --git a/.github/workflows/build_documentation.yaml b/.github/workflows/build_documentation.yaml deleted file mode 100644 index 4d0b19a3..00000000 --- a/.github/workflows/build_documentation.yaml +++ /dev/null @@ -1,20 +0,0 @@ -name: Build documentation - -on: - push: - paths: - - "docs/source/**" - branches: - - main - - doc-builder* - - v*-release - -jobs: - build: - uses: huggingface/doc-builder/.github/workflows/build_main_documentation.yml@main - with: - commit_sha: ${{ github.sha }} - package: text-generation-inference - additional_args: --not_python_module - secrets: - hf_token: ${{ secrets.HF_DOC_BUILD_PUSH }} diff --git a/.github/workflows/build_pr_documentation.yaml b/.github/workflows/build_pr_documentation.yaml deleted file mode 100644 index a5ce39a5..00000000 --- a/.github/workflows/build_pr_documentation.yaml +++ /dev/null @@ -1,19 +0,0 @@ -name: Build PR Documentation - -on: - pull_request: - paths: - - "docs/source/**" - -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - -jobs: - build: - uses: huggingface/doc-builder/.github/workflows/build_pr_documentation.yml@main - with: - commit_sha: ${{ github.event.pull_request.head.sha }} - pr_number: ${{ github.event.number }} - package: text-generation-inference - additional_args: --not_python_module diff --git a/.github/workflows/ci_build.yaml b/.github/workflows/ci_build.yaml deleted file mode 100644 index 5190f321..00000000 --- a/.github/workflows/ci_build.yaml +++ /dev/null @@ -1,49 +0,0 @@ -name: CI build - -on: - push: - branches: - - 'main' - tags: - - 'v*' - pull_request: - paths: - - ".github/workflows/build.yaml" - - "integration-tests/**" - - "backends/**" - - "server/**" - - "proto/**" - - "router/**" - - "launcher/**" - - "Cargo.lock" - - "rust-toolchain.toml" - - "Dockerfile" - - "Dockerfile_amd" - - "Dockerfile_intel" - branches: - - "main" - workflow_dispatch: - inputs: - release-tests: - description: "Run release integration tests" - required: true - default: false - type: boolean - -jobs: - build: - strategy: - # super important if you want to see all results, even if one fails - # fail-fast is true by default - fail-fast: false - matrix: - hardware: ["cuda", "rocm", "intel-xpu", "intel-cpu"] - uses: ./.github/workflows/build.yaml # calls the one above ^ - permissions: - contents: write - packages: write - with: - hardware: ${{ matrix.hardware }} - # https://github.com/actions/runner/issues/2206 - release-tests: ${{ inputs.release-tests == true }} - secrets: inherit diff --git a/.github/workflows/client-tests.yaml b/.github/workflows/client-tests.yaml deleted file mode 100644 index ff2928c4..00000000 --- a/.github/workflows/client-tests.yaml +++ /dev/null @@ -1,26 +0,0 @@ -name: Python Client Tests - -on: - pull_request: - paths: - - ".github/workflows/client-tests.yaml" - - "clients/python/**" - -jobs: - run_tests: - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v2 - - name: Set up Python - uses: actions/setup-python@v1 - with: - python-version: 3.9 - - name: Install - run: | - cd clients/python && pip install . - - name: Run tests - run: | - pip install pytest pytest-asyncio - export HF_TOKEN=${{ secrets.HF_TOKEN }} - make python-client-tests diff --git a/.github/workflows/integration_tests.yaml b/.github/workflows/integration_tests.yaml deleted file mode 100644 index 59a8d304..00000000 --- a/.github/workflows/integration_tests.yaml +++ /dev/null @@ -1,41 +0,0 @@ -name: Integration tests - -on: - workflow_call: - inputs: - docker_image: - type: string - description: Hardware - required: true - docker_devices: - type: string - description: Hardware - runs_on: - type: string - required: true - description: Hardware to run integration tests -jobs: - integration_tests: - concurrency: - group: ${{ github.workflow }}-${{ github.job }}-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - runs-on: ${{ inputs.runs_on }} - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - name: Inject slug/short variables - uses: rlespinasse/github-slug-action@v4.4.1 - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: 3.9 - - name: Install - run: | - make install-integration-tests - - name: Run tests - run: | - export DOCKER_VOLUME=/mnt/cache - export DOCKER_IMAGE=${{ inputs.docker_image }} - export DOCKER_DEVICES=${{ inputs.docker_devices }} - export HF_TOKEN=${{ secrets.HF_TOKEN }} - pytest -s -vv integration-tests diff --git a/.github/workflows/load_test.yaml b/.github/workflows/load_test.yaml deleted file mode 100644 index ecfe0fda..00000000 --- a/.github/workflows/load_test.yaml +++ /dev/null @@ -1,43 +0,0 @@ -name: Nightly load test - -on: - schedule: - - cron: '0 0 * * 1-5' - - pull_request: - paths: - - ".github/workflows/load_test.yaml" - branches: - - 'main' - -jobs: - load-tests: - concurrency: - group: ${{ github.workflow }}-${{ github.job }}-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - runs-on: - group: aws-g5-12xlarge - env: - DOCKER_VOLUME: /cache - steps: - - name: Checkout repository - uses: actions/checkout@v3 - - - name: Install k6 - run: | - curl https://github.com/grafana/k6/releases/download/v0.44.0/k6-v0.44.0-linux-amd64.tar.gz -L | tar xvz --strip-components 1 - - - name: Start starcoder - run: | - docker run --name tgi-starcoder --rm --gpus all -p 3000:80 -v /mnt/cache:/data -e HF_TOKEN=${{ secrets.HF_TOKEN }} --pull always -d ghcr.io/huggingface/text-generation-inference:latest --model-id bigcode/starcoder --num-shard 2 --max-batch-total-tokens 32768 - sleep 10 - wget --timeout 10 --retry-on-http-error --waitretry=1 --tries=240 http://localhost:3000/health - - - name: Run k6 - run: | - ./k6 run load_tests/starcoder_load.js - - - name: Stop starcoder - if: ${{ always() }} - run: | - docker stop tgi-starcoder || true diff --git a/.github/workflows/nix_tests.yaml b/.github/workflows/nix_tests.yaml deleted file mode 100644 index f2209f8a..00000000 --- a/.github/workflows/nix_tests.yaml +++ /dev/null @@ -1,41 +0,0 @@ -name: "Nix Tests" -on: - pull_request: - paths: - - ".github/workflows/nix_tests.yaml" - - "server/**" - - "proto/**" - - "router/**" - - "launcher/**" - - "Cargo.lock" - - "rust-toolchain.toml" -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - -jobs: - tests: - runs-on: - group: aws-highmemory-32-plus-priv - steps: - - uses: actions/checkout@v4 - - uses: cachix/install-nix-action@v27 - with: - nix_path: nixpkgs=channel:nixos-unstable - - uses: cachix/cachix-action@v14 - with: - name: text-generation-inference - # If you chose signing key for write access - authToken: '${{ secrets.CACHIX_AUTH_TOKEN }}' - env: - USER: github_runner - - name: Build - run: nix develop .#test --command echo "Ok" - - name: Pre-commit tests. - run: nix develop .#test --command pre-commit run --all-files - - name: Python tests. - run: nix develop .#test --command python -m pytest server/tests/ - env: - HF_TOKEN: ${{ secrets.HF_TOKEN }} - - name: Rust tests. - run: nix develop .#test --command cargo test diff --git a/.github/workflows/push_docker_image.yml b/.github/workflows/push_docker_image.yml new file mode 100644 index 00000000..f49e87bb --- /dev/null +++ b/.github/workflows/push_docker_image.yml @@ -0,0 +1,56 @@ +name: Build and push docker image to Github registry + +on: + workflow_dispatch: + inputs: + tag: + description: 'Tag for the Docker image:' + required: true + +jobs: + build-and-push: + concurrency: + group: ${{ github.workflow }} + cancel-in-progress: true + runs-on: ubuntu-latest + permissions: + contents: write + packages: write + # This is used to complete the identity challenge + # with sigstore/fulcio when running outside of PRs. + id-token: write + security-events: write + steps: + - name: Checkout repository + uses: actions/checkout@v4 + - name: Initialize Docker Buildx + uses: docker/setup-buildx-action@v3 + with: + install: true + config-inline: | + [registry."docker.io"] + - name: Login to GitHub Container Registry + if: github.event_name != 'pull_request' + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + - name: Extract metadata (tags, labels) for Docker + id: meta + uses: docker/metadata-action@v4.3.0 + with: + flavor: | + latest=true + images: ghcr.io/huggingface/tgi-gaudi + tags: | + type=raw,value=${{ github.event.inputs.tag }} + - name: Build and push Docker image + id: build-and-push + uses: docker/build-push-action@v4 + with: + context: . + file: Dockerfile + push: true + platforms: 'linux/amd64' + tags: ${{ steps.meta.outputs.tags }} diff --git a/.github/workflows/stale.yaml b/.github/workflows/stale.yaml deleted file mode 100644 index a5e50a79..00000000 --- a/.github/workflows/stale.yaml +++ /dev/null @@ -1,14 +0,0 @@ -name: 'Close stale issues and PRs' -on: - schedule: - - cron: '30 1 * * *' - -jobs: - stale: - runs-on: ubuntu-latest - steps: - - uses: actions/stale@v8 - with: - stale-issue-message: 'This issue is stale because it has been open 30 days with no activity. Remove stale label or comment or this will be closed in 5 days.' - days-before-stale: 30 - days-before-close: 5 diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml deleted file mode 100644 index 4eeca334..00000000 --- a/.github/workflows/tests.yaml +++ /dev/null @@ -1,63 +0,0 @@ -name: Server Tests - -on: - pull_request: - paths: - - ".github/workflows/tests.yaml" - - "server/**" - - "proto/**" - - "router/**" - - "launcher/**" - - "Cargo.lock" - - "rust-toolchain.toml" - -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - -jobs: - run_tests: - runs-on: - group: aws-highmemory-32-plus-priv - steps: - - uses: actions/checkout@v2 - - name: Set up Python - uses: actions/setup-python@v4 - id: python - with: - python-version: 3.11 - - name: Install Rust - uses: actions-rs/toolchain@v1 - with: - # Released on: 02 May, 2024 - # https://releases.rs/docs/1.78.0/ - toolchain: 1.80.0 - override: true - components: rustfmt, clippy - - name: Install Protoc - uses: arduino/setup-protoc@v1 - - name: Clean unused files - run: | - sudo rm -rf /usr/local/lib/android # will release about 10 GB if you don't need Android - sudo rm -rf /usr/share/dotnet # will release about 20GB if you don't need .NET - - name: Install - run: | - sudo apt update - sudo apt install python3.11-dev -y - make install-cpu - - name: Run server tests - run: | - pip install pytest - export HF_TOKEN=${{ secrets.HF_TOKEN }} - pytest -s -vv server/tests - - name: Pre-commit checks - run: | - pip install pre-commit - pre-commit install - pre-commit run --all-files - - name: Run Rust tests - run: | - cargo test - - name: Run Rust tests with google feature - run: | - cargo test --features google diff --git a/.github/workflows/trufflehog.yaml b/.github/workflows/trufflehog.yaml deleted file mode 100644 index b406d43b..00000000 --- a/.github/workflows/trufflehog.yaml +++ /dev/null @@ -1,18 +0,0 @@ -on: - push: - -name: Secret Leaks - -permissions: - contents: read - -jobs: - trufflehog: - runs-on: ubuntu-latest - steps: - - name: Checkout code - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - name: Secret Scanning - uses: trufflesecurity/trufflehog@main diff --git a/.github/workflows/upload_pr_documentation.yaml b/.github/workflows/upload_pr_documentation.yaml deleted file mode 100644 index ae00bb51..00000000 --- a/.github/workflows/upload_pr_documentation.yaml +++ /dev/null @@ -1,16 +0,0 @@ -name: Upload PR Documentation - -on: - workflow_run: - workflows: ["Build PR Documentation"] - types: - - completed - -jobs: - build: - uses: huggingface/doc-builder/.github/workflows/upload_pr_documentation.yml@main - with: - package_name: text-generation-inference - secrets: - hf_token: ${{ secrets.HF_DOC_BUILD_PUSH }} - comment_bot_token: ${{ secrets.COMMENT_BOT_TOKEN }} diff --git a/Makefile b/Makefile index 3068a06f..664b869d 100644 --- a/Makefile +++ b/Makefile @@ -48,8 +48,5 @@ python-tests: python-server-tests python-client-tests run-falcon-7b-instruct: text-generation-launcher --model-id tiiuae/falcon-7b-instruct --port 8080 -run-falcon-7b-instruct-quantize: - text-generation-launcher --model-id tiiuae/falcon-7b-instruct --quantize bitsandbytes --port 8080 - clean: rm -rf target aml