From f9d0ec376a6ddafb07828913968b9ed7c367fb0c Mon Sep 17 00:00:00 2001
From: OlivierDehaene <olivier@huggingface.co>
Date: Mon, 23 Jan 2023 17:11:27 +0100
Subject: [PATCH] feat(docker): Make the image compatible with api-inference
 (#29)

---
 Dockerfile           | 6 +++---
 aml/deployment.yaml  | 2 +-
 router/src/server.rs | 1 +
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index e8455b03..932d85ac 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -26,10 +26,10 @@ FROM nvidia/cuda:11.8.0-devel-ubuntu22.04
 ENV LANG=C.UTF-8 \
     LC_ALL=C.UTF-8 \
     DEBIAN_FRONTEND=noninteractive \
-    MODEL_BASE_PATH=/var/azureml-model \
-    MODEL_NAME=bigscience/bloom \
+    MODEL_BASE_PATH=/data \
+    MODEL_ID=bigscience/bloom-560m \
     QUANTIZE=false \
-    NUM_GPUS=8 \
+    NUM_GPUS=1 \
     SAFETENSORS_FAST_GPU=1 \
     CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
     NCCL_ASYNC_ERROR_HANDLING=1 \
diff --git a/aml/deployment.yaml b/aml/deployment.yaml
index 51e124b6..59fdf59a 100644
--- a/aml/deployment.yaml
+++ b/aml/deployment.yaml
@@ -5,7 +5,7 @@ model: azureml:bloom:1
 model_mount_path: /var/azureml-model
 environment_variables:
   MODEL_BASE_PATH: /var/azureml-model/bloom
-  MODEL_NAME: bigscience/bloom
+  MODEL_ID: bigscience/bloom
   NUM_GPUS: 8
 environment:
   image: db4c2190dd824d1f950f5d1555fbadf0.azurecr.io/text-generation-inference:0.3.1
diff --git a/router/src/server.rs b/router/src/server.rs
index 2e6c473f..db6c369b 100644
--- a/router/src/server.rs
+++ b/router/src/server.rs
@@ -197,6 +197,7 @@ pub async fn run(
     let app = Router::new()
         .route("/generate", post(generate))
         .layer(Extension(shared_state.clone()))
+        .route("/", get(health))
         .route("/health", get(health))
         .layer(Extension(shared_state.clone()));