diff --git a/aml/README.md b/aml/README.md
index c38f9fef..959e2942 100644
--- a/aml/README.md
+++ b/aml/README.md
@@ -2,6 +2,7 @@
 docker build . -t db4c2190dd824d1f950f5d1555fbadf0.azurecr.io/text-generation:0.1
 docker push db4c2190dd824d1f950f5d1555fbadf0.azurecr.io/text-generation:0.1
 
+az ml model create -f model.yaml -g HuggingFace-BLOOM-ModelPage -w HuggingFace
 az ml online-endpoint create -f endpoint.yaml -g HuggingFace-BLOOM-ModelPage -w HuggingFace
 az ml online-deployment create -f deployment.yaml -g HuggingFace-BLOOM-ModelPage -w HuggingFace
 ```
\ No newline at end of file
diff --git a/aml/deployment.yaml b/aml/deployment.yaml
index f6b55faa..31cb09c5 100644
--- a/aml/deployment.yaml
+++ b/aml/deployment.yaml
@@ -1,9 +1,7 @@
 $schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json
 name: bloom-deployment
 endpoint_name: bloom-inference
-model:
-  name: bloom
-  path: ./bloom
+model: azureml:bloom:1
 model_mount_path: /var/azureml-model
 environment_variables:
   MODEL_BASE_PATH: /var/azureml-model/bloom
@@ -24,6 +22,7 @@ environment:
 instance_type: Standard_ND96amsr_A100_v4
 request_settings:
   request_timeout_ms: 90000
+  max_concurrent_requests_per_instance: 256
 liveness_probe:
   initial_delay: 300
   timeout: 20
diff --git a/aml/endpoint.yaml b/aml/endpoint.yaml
index 934b31ad..f2f01d5e 100644
--- a/aml/endpoint.yaml
+++ b/aml/endpoint.yaml
@@ -1,3 +1,3 @@
 $schema: https://azuremlsdk2.blob.core.windows.net/latest/managedOnlineEndpoint.schema.json
 name: bloom-inference
-auth_mode: aml_token
+auth_mode: key
diff --git a/aml/model.yaml b/aml/model.yaml
new file mode 100644
index 00000000..e4f1ded2
--- /dev/null
+++ b/aml/model.yaml
@@ -0,0 +1,5 @@
+$schema: https://azuremlschemas.azureedge.net/latest/model.schema.json
+name: bloom
+version: 1
+path: ./bloom
+type: custom_model