From d503e8f09d23821239b3845974f6e56f013d3d2c Mon Sep 17 00:00:00 2001 From: OlivierDehaene Date: Wed, 29 Mar 2023 21:38:30 +0200 Subject: [PATCH] feat: aws sagemaker compatible image (#147) The only difference is that now it pushes to registry.internal.huggingface.tech/api-inference/community/text-generation-inference/sagemaker:... instead of registry.internal.huggingface.tech/api-inference/community/text-generation-inference:sagemaker-... --------- Co-authored-by: Philipp Schmid <32632186+philschmid@users.noreply.github.com> --- .github/workflows/build.yaml | 46 +++++++++++++++++++++++++++++++++++- Dockerfile | 13 +++++++++- router/src/server.rs | 10 +++++++- sagemaker-entrypoint.sh | 20 ++++++++++++++++ 4 files changed, 86 insertions(+), 3 deletions(-) create mode 100755 sagemaker-entrypoint.sh diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 56015177..2090d142 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -83,4 +83,48 @@ jobs: tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} cache-from: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache,mode=max - cache-to: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache,mode=max \ No newline at end of file + cache-to: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache,mode=max + + build-and-push-sagemaker-image: + needs: + - build-and-push-image + runs-on: ubuntu-latest + steps: + - name: Initialize Docker Buildx + uses: docker/setup-buildx-action@v2.0.0 + with: + install: true + - name: Checkout repository + uses: actions/checkout@v3 + - name: Inject slug/short variables + uses: rlespinasse/github-slug-action@v4 + - name: Login to internal Container Registry + uses: docker/login-action@v2.1.0 + with: + username: ${{ secrets.TAILSCALE_DOCKER_USERNAME }} + password: ${{ secrets.TAILSCALE_DOCKER_PASSWORD }} + registry: registry.internal.huggingface.tech + - name: Extract metadata (tags, labels) for Docker + id: meta + uses: docker/metadata-action@v4.3.0 + with: + flavor: | + latest=auto + images: | + registry.internal.huggingface.tech/api-inference/community/text-generation-inference/sagemaker + tags: | + type=semver,pattern={{version}} + type=semver,pattern={{major}}.{{minor}} + type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) }} + type=raw,value=sha-${{ env.GITHUB_SHA_SHORT }} + - name: Build and push Docker image + uses: docker/build-push-action@v2 + with: + context: . + file: Dockerfile + push: ${{ github.event_name != 'pull_request' }} + platforms: 'linux/amd64' + target: sagemaker + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + cache-from: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache,mode=max \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 592f1f72..85463af1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -27,7 +27,7 @@ COPY router router COPY launcher launcher RUN cargo build --release -FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 +FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 as base ENV LANG=C.UTF-8 \ LC_ALL=C.UTF-8 \ @@ -76,5 +76,16 @@ COPY --from=builder /usr/src/target/release/text-generation-router /usr/local/bi # Install launcher COPY --from=builder /usr/src/target/release/text-generation-launcher /usr/local/bin/text-generation-launcher +# AWS Sagemaker compatbile image +FROM base as sagemaker + +COPY sagemaker-entrypoint.sh entrypoint.sh +RUN chmod +x entrypoint.sh + +ENTRYPOINT ["./entrypoint.sh"] + +# Original image +FROM base + ENTRYPOINT ["text-generation-launcher"] CMD ["--json-output"] \ No newline at end of file diff --git a/router/src/server.rs b/router/src/server.rs index 3b63ec8a..f7850053 100644 --- a/router/src/server.rs +++ b/router/src/server.rs @@ -529,11 +529,19 @@ pub async fn run( // Create router let app = Router::new() .merge(SwaggerUi::new("/docs").url("/api-doc/openapi.json", ApiDoc::openapi())) + // Base routes .route("/", post(compat_generate)) .route("/generate", post(generate)) .route("/generate_stream", post(generate_stream)) - .route("/", get(health)) + // AWS Sagemaker route + .route("/invocations", post(compat_generate)) + // Base Health route .route("/health", get(health)) + // Inference API health route + .route("/", get(health)) + // AWS Sagemaker health route + .route("/ping", get(health)) + // Prometheus metrics route .route("/metrics", get(metrics)) .layer(Extension(compat_return_full_text)) .layer(Extension(infer)) diff --git a/sagemaker-entrypoint.sh b/sagemaker-entrypoint.sh new file mode 100755 index 00000000..711e3721 --- /dev/null +++ b/sagemaker-entrypoint.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +if [[ -z "${HF_MODEL_ID}" ]]; then + echo "HF_MODEL_ID must be set" + exit 1 +fi + +if [[ -n "${HF_MODEL_REVISION}" ]]; then + export REVISION="${HF_MODEL_REVISION}" +fi + +if [[ -n "${SM_NUM_GPUS}" ]]; then + export NUM_SHARD="${SM_NUM_GPUS}" +fi + +if [[ -n "${HF_MODEL_QUANTIZE}" ]]; then + export QUANTIZE="${HF_MODEL_QUANTIZE}" +fi + +text-generation-launcher --port 8080 \ No newline at end of file