mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-12 04:44:52 +00:00
?
This commit is contained in:
parent
3e8d722733
commit
b18ed0f443
@ -484,6 +484,7 @@ def launcher(event_loop):
|
||||
try:
|
||||
container = client.containers.get(container_name)
|
||||
container.stop()
|
||||
container.remove()
|
||||
container.wait()
|
||||
except NotFound:
|
||||
pass
|
||||
@ -513,24 +514,22 @@ def launcher(event_loop):
|
||||
device_requests = []
|
||||
if not devices:
|
||||
devices = None
|
||||
elif devices == ["nvidia.com/gpu=all"]:
|
||||
devices = None
|
||||
device_requests = [
|
||||
docker.types.DeviceRequest(
|
||||
driver="cdi",
|
||||
# count=gpu_count,
|
||||
device_ids=[f"nvidia.com/gpu={i}"],
|
||||
)
|
||||
for i in range(gpu_count)
|
||||
]
|
||||
else:
|
||||
devices = []
|
||||
device_requests = [
|
||||
docker.types.DeviceRequest(count=gpu_count, capabilities=[["gpu"]])
|
||||
]
|
||||
|
||||
# raise Exception(
|
||||
# f"""
|
||||
# Docoker image: {DOCKER_IMAGE}
|
||||
# args: {args}
|
||||
# container name: {container_name}
|
||||
# env: {env}
|
||||
# device_requests: {device_requests}
|
||||
# devices: {devices}
|
||||
# """
|
||||
# )
|
||||
# env.pop("LOG_LEVEL")
|
||||
# env.pop("ROCR_VISIBLE_DEVICES")
|
||||
container = client.containers.run(
|
||||
DOCKER_IMAGE,
|
||||
command=args,
|
||||
@ -546,10 +545,7 @@ def launcher(event_loop):
|
||||
shm_size="1G",
|
||||
)
|
||||
|
||||
import time
|
||||
|
||||
time.sleep(600)
|
||||
|
||||
try:
|
||||
yield ContainerLauncherHandle(client, container.name, port)
|
||||
|
||||
if not use_flash_attention:
|
||||
@ -564,6 +560,7 @@ def launcher(event_loop):
|
||||
container_output = container.logs().decode("utf-8")
|
||||
print(container_output, file=sys.stderr)
|
||||
|
||||
finally:
|
||||
container.remove()
|
||||
|
||||
if DOCKER_IMAGE is not None:
|
||||
|
Loading…
Reference in New Issue
Block a user