diff --git a/aml/README.md b/aml/README.md index c38f9fef..959e2942 100644 --- a/aml/README.md +++ b/aml/README.md @@ -2,6 +2,7 @@ docker build . -t db4c2190dd824d1f950f5d1555fbadf0.azurecr.io/text-generation:0.1 docker push db4c2190dd824d1f950f5d1555fbadf0.azurecr.io/text-generation:0.1 +az ml model create -f model.yaml -g HuggingFace-BLOOM-ModelPage -w HuggingFace az ml online-endpoint create -f endpoint.yaml -g HuggingFace-BLOOM-ModelPage -w HuggingFace az ml online-deployment create -f deployment.yaml -g HuggingFace-BLOOM-ModelPage -w HuggingFace ``` \ No newline at end of file diff --git a/aml/deployment.yaml b/aml/deployment.yaml index f6b55faa..31cb09c5 100644 --- a/aml/deployment.yaml +++ b/aml/deployment.yaml @@ -1,9 +1,7 @@ $schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json name: bloom-deployment endpoint_name: bloom-inference -model: - name: bloom - path: ./bloom +model: azureml:bloom:1 model_mount_path: /var/azureml-model environment_variables: MODEL_BASE_PATH: /var/azureml-model/bloom @@ -24,6 +22,7 @@ environment: instance_type: Standard_ND96amsr_A100_v4 request_settings: request_timeout_ms: 90000 + max_concurrent_requests_per_instance: 256 liveness_probe: initial_delay: 300 timeout: 20 diff --git a/aml/endpoint.yaml b/aml/endpoint.yaml index 934b31ad..f2f01d5e 100644 --- a/aml/endpoint.yaml +++ b/aml/endpoint.yaml @@ -1,3 +1,3 @@ $schema: https://azuremlsdk2.blob.core.windows.net/latest/managedOnlineEndpoint.schema.json name: bloom-inference -auth_mode: aml_token +auth_mode: key diff --git a/aml/model.yaml b/aml/model.yaml new file mode 100644 index 00000000..e4f1ded2 --- /dev/null +++ b/aml/model.yaml @@ -0,0 +1,5 @@ +$schema: https://azuremlschemas.azureedge.net/latest/model.schema.json +name: bloom +version: 1 +path: ./bloom +type: custom_model