From f9d0ec376a6ddafb07828913968b9ed7c367fb0c Mon Sep 17 00:00:00 2001 From: OlivierDehaene Date: Mon, 23 Jan 2023 17:11:27 +0100 Subject: [PATCH] feat(docker): Make the image compatible with api-inference (#29) --- Dockerfile | 6 +++--- aml/deployment.yaml | 2 +- router/src/server.rs | 1 + 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/Dockerfile b/Dockerfile index e8455b038..932d85ac2 100644 --- a/Dockerfile +++ b/Dockerfile @@ -26,10 +26,10 @@ FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 ENV LANG=C.UTF-8 \ LC_ALL=C.UTF-8 \ DEBIAN_FRONTEND=noninteractive \ - MODEL_BASE_PATH=/var/azureml-model \ - MODEL_NAME=bigscience/bloom \ + MODEL_BASE_PATH=/data \ + MODEL_ID=bigscience/bloom-560m \ QUANTIZE=false \ - NUM_GPUS=8 \ + NUM_GPUS=1 \ SAFETENSORS_FAST_GPU=1 \ CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ NCCL_ASYNC_ERROR_HANDLING=1 \ diff --git a/aml/deployment.yaml b/aml/deployment.yaml index 51e124b66..59fdf59a3 100644 --- a/aml/deployment.yaml +++ b/aml/deployment.yaml @@ -5,7 +5,7 @@ model: azureml:bloom:1 model_mount_path: /var/azureml-model environment_variables: MODEL_BASE_PATH: /var/azureml-model/bloom - MODEL_NAME: bigscience/bloom + MODEL_ID: bigscience/bloom NUM_GPUS: 8 environment: image: db4c2190dd824d1f950f5d1555fbadf0.azurecr.io/text-generation-inference:0.3.1 diff --git a/router/src/server.rs b/router/src/server.rs index 2e6c473f0..db6c369b2 100644 --- a/router/src/server.rs +++ b/router/src/server.rs @@ -197,6 +197,7 @@ pub async fn run( let app = Router::new() .route("/generate", post(generate)) .layer(Extension(shared_state.clone())) + .route("/", get(health)) .route("/health", get(health)) .layer(Extension(shared_state.clone()));