diff --git a/.github/workflows/ci_build.yaml b/.github/workflows/ci_build.yaml new file mode 100644 index 00000000..754c4850 --- /dev/null +++ b/.github/workflows/ci_build.yaml @@ -0,0 +1,36 @@ +name: CI build + +on: + push: + branches: + - 'main' + tags: + - 'v*' + pull_request: + paths: + - ".github/workflows/build.yaml" + - "integration-tests/**" + - "server/**" + - "proto/**" + - "router/**" + - "launcher/**" + - "Cargo.lock" + - "rust-toolchain.toml" + - "Dockerfile" + - "Dockerfile_amd" + - "Dockerfile_intel" + branches: + - 'main' + +jobs: + build: + strategy: + # super important if you want to see all results, even if one fails + # fail-fast is true by default + fail-fast: false + matrix: + hardware: ["cuda", "rocm", "intel"] + uses: ./.github/workflows/build.yaml # calls the one above ^ + with: + hardware: ${{ matrix.hardware }} + secrets: inherit diff --git a/.github/workflows/integration_tests.yaml b/.github/workflows/integration_tests.yaml new file mode 100644 index 00000000..4e111afe --- /dev/null +++ b/.github/workflows/integration_tests.yaml @@ -0,0 +1,41 @@ +name: Integration tests + +on: + workflow_call: + inputs: + docker_image: + type: string + description: Hardware + required: true + docker_devices: + type: string + description: Hardware + runs_on: + type: string + required: true + description: Hardware to run integration tests +jobs: + integration_tests: + concurrency: + group: ${{ github.workflow }}-${{ github.job }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + runs-on: ${{ inputs.runs_on }} + steps: + - name: Checkout repository + uses: actions/checkout@v4 + - name: Inject slug/short variables + uses: rlespinasse/github-slug-action@v4.4.1 + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: 3.9 + - name: Install + run: | + make install-integration-tests + - name: Run tests + run: | + export DOCKER_VOLUME=/mnt/cache + export DOCKER_IMAGE=${{ inputs.docker_image }} + export DOCKER_DEVICES=${{ inputs.docker_devices }} + export HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} + pytest -s -vv integration-tests diff --git a/.github/workflows/trufflehog.yml b/.github/workflows/trufflehog.yaml similarity index 100% rename from .github/workflows/trufflehog.yml rename to .github/workflows/trufflehog.yaml diff --git a/integration-tests/conftest.py b/integration-tests/conftest.py index 2ef85da6..0b239484 100644 --- a/integration-tests/conftest.py +++ b/integration-tests/conftest.py @@ -34,6 +34,7 @@ from text_generation.types import ( DOCKER_IMAGE = os.getenv("DOCKER_IMAGE", None) HUGGING_FACE_HUB_TOKEN = os.getenv("HUGGING_FACE_HUB_TOKEN", None) DOCKER_VOLUME = os.getenv("DOCKER_VOLUME", "/data") +DOCKER_DEVICES = os.getenv("DOCKER_DEVICES") class ResponseComparator(JSONSnapshotExtension): @@ -453,6 +454,18 @@ def launcher(event_loop): if DOCKER_VOLUME: volumes = [f"{DOCKER_VOLUME}:/data"] + if DOCKER_DEVICES: + devices = DOCKER_DEVICES.split(",") + visible = os.getenv("ROCR_VISIBLE_DEVICES") + if visible: + env["ROCR_VISIBLE_DEVICES"] = visible + device_requests = [] + else: + devices = [] + device_requests = [ + docker.types.DeviceRequest(count=gpu_count, capabilities=[["gpu"]]) + ] + container = client.containers.run( DOCKER_IMAGE, command=args, @@ -460,9 +473,8 @@ def launcher(event_loop): environment=env, auto_remove=False, detach=True, - device_requests=[ - docker.types.DeviceRequest(count=gpu_count, capabilities=[["gpu"]]) - ], + device_requests=device_requests, + devices=devices, volumes=volumes, ports={"80/tcp": port}, shm_size="1G",