diff --git a/aml/deployment.yaml b/aml/deployment.yaml index 102292e10..88d05c307 100644 --- a/aml/deployment.yaml +++ b/aml/deployment.yaml @@ -8,7 +8,7 @@ environment_variables: MODEL_ID: bigscience/bloom NUM_SHARD: 8 environment: - image: db4c2190dd824d1f950f5d1555fbadf0.azurecr.io/text-generation-inference:sha-cd5961b + image: db4c2190dd824d1f950f5d1555fbadf0.azurecr.io/text-generation-inference:sha-941cd42 inference_config: liveness_route: port: 80 @@ -24,15 +24,15 @@ request_settings: request_timeout_ms: 60000 max_concurrent_requests_per_instance: 256 liveness_probe: - initial_delay: 140 - timeout: 60 + initial_delay: 200 + timeout: 20 period: 60 success_threshold: 1 - failure_threshold: 2 + failure_threshold: 5 readiness_probe: - initial_delay: 140 - timeout: 60 + initial_delay: 200 + timeout: 20 period: 60 success_threshold: 1 - failure_threshold: 2 + failure_threshold: 5 instance_count: 1