?

2025-09-12 04:44:52 +00:00 · 2024-10-09 11:42:38 +02:00 · 2024-10-09 11:42:38 +02:00 · b18ed0f443
commit b18ed0f443
parent 3e8d722733
1 changed files with 25 additions and 28 deletions
--- a/integration-tests/conftest.py
+++ b/integration-tests/conftest.py
@ -484,6 +484,7 @@ def launcher(event_loop):
        try:
            container = client.containers.get(container_name)
            container.stop()
+            container.remove()
            container.wait()
        except NotFound:
            pass
@ -513,24 +514,22 @@ def launcher(event_loop):
            device_requests = []
            if not devices:
                devices = None
+            elif devices == ["nvidia.com/gpu=all"]:
+                devices = None
+                device_requests = [
+                    docker.types.DeviceRequest(
+                        driver="cdi",
+                        # count=gpu_count,
+                        device_ids=[f"nvidia.com/gpu={i}"],
+                    )
+                    for i in range(gpu_count)
+                ]
        else:
            devices = []
            device_requests = [
                docker.types.DeviceRequest(count=gpu_count, capabilities=[["gpu"]])
            ]

-        # raise Exception(
-        #     f"""
-        #     Docoker image: {DOCKER_IMAGE}
-        #     args: {args}
-        #     container name: {container_name}
-        #     env: {env}
-        #     device_requests: {device_requests}
-        #     devices: {devices}
-        # """
-        # )
-        # env.pop("LOG_LEVEL")
-        # env.pop("ROCR_VISIBLE_DEVICES")
        container = client.containers.run(
            DOCKER_IMAGE,
            command=args,
@ -546,10 +545,7 @@ def launcher(event_loop):
            shm_size="1G",
        )

-        import time
-
-        time.sleep(600)
-
+        try:
            yield ContainerLauncherHandle(client, container.name, port)

            if not use_flash_attention:
@ -564,6 +560,7 @@ def launcher(event_loop):
            container_output = container.logs().decode("utf-8")
            print(container_output, file=sys.stderr)

+        finally:
            container.remove()

    if DOCKER_IMAGE is not None: