Introduce basic helm chart

Signed-off-by: Wilfried Roset <wilfriedroset@users.noreply.github.com>
2025-09-11 04:14:52 +00:00 · 2023-11-09 21:19:45 +01:00 · 2023-11-09 21:19:45 +01:00 · b7a825d0b5
commit b7a825d0b5
parent a5def7c222
10 changed files with 386 additions and 0 deletions
--- a/helm/charts/text-generation-inference/.helmignore
+++ b/helm/charts/text-generation-inference/.helmignore
@ -0,0 +1,23 @@
 # Patterns to ignore when building packages.
 # This supports shell glob matching, relative path matching, and
 # negation (prefixed with !). Only one pattern per line.
 .DS_Store
 # Common VCS dirs
 .git/
 .gitignore
 .bzr/
 .bzrignore
 .hg/
 .hgignore
 .svn/
 # Common backup files
 *.swp
 *.bak
 *.tmp
 *.orig
 *~
 # Various IDEs
 .project
 .idea/
 *.tmproj
 .vscode/
--- a/helm/charts/text-generation-inference/Chart.yaml
+++ b/helm/charts/text-generation-inference/Chart.yaml
@ -0,0 +1,29 @@
 apiVersion: v2
 name: text-generation-inference
 description: A Helm chart Huggingface's text generation inference
 type: application
 version: 0.1.0
 appVersion: "1.1.0"
 kubeVersion: "^1.27.0-0"
 home: https://github.com/huggingface/text-generation-inference
 annotations:
  "artifacthub.io/license": HFOILv1.0
  "artifacthub.io/links": |
    - name: Upstream Project
      url: https://github.com/huggingface/text-generation-inference
    - name: Documentation
      url: https://huggingface.co/docs/text-generation-inference/index
 maintainers:
  - name: wilfriedroset
 keywords:
  - bloom
  - deep-learning
  - falcon
  - gpt
  - inference
  - inference
  - llm
  - nlp
  - pytorch
  - starcoder
  - transformer
--- a/helm/charts/text-generation-inference/README.md
+++ b/helm/charts/text-generation-inference/README.md
@ -0,0 +1,27 @@
 # Text Generation Inference chart
 Helm chart for deploying [Text Generation Inference](https://huggingface.co/docs/text-generation-inference) to Kubernetes.
 ## Installation
 ### Starcoder
 Here is an example of the values to pass to the chart in order to deploy [bigcode/starcoderbase-7b](https://huggingface.co/bigcode/starcoderbase-7b)
 ```yaml
 ---
 args:
  - "--model-id"
  - "bigcode/starcoderbase-7b"
  - "--num-shard"
  - "1"
 env:
  HUGGING_FACE_HUB_TOKEN: hf_FIXME
 persistence:
  storageClassName: "default"
  accessModes: ["ReadWriteOnce"]
  storage: 150Gi
 ```
 ```shell
 helm install -f values.yaml startcoder .
 ```
--- a/helm/charts/text-generation-inference/templates/NOTES.txt
+++ b/helm/charts/text-generation-inference/templates/NOTES.txt
@ -0,0 +1,22 @@
 1. Get the application URL by running these commands:
 {{- if .Values.ingress.enabled }}
 {{- range $host := .Values.ingress.hosts }}
  {{- range .paths }}
  http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ .path }}
  {{- end }}
 {{- end }}
 {{- else if contains "NodePort" .Values.service.type }}
  export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "text-generation-inference.fullname" . }})
  export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
  echo http://$NODE_IP:$NODE_PORT
 {{- else if contains "LoadBalancer" .Values.service.type }}
     NOTE: It may take a few minutes for the LoadBalancer IP to be available.
           You can watch the status of by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "text-generation-inference.fullname" . }}'
  export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "text-generation-inference.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
  echo http://$SERVICE_IP:{{ .Values.service.port }}
 {{- else if contains "ClusterIP" .Values.service.type }}
  export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "text-generation-inference.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
  export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
  echo "Visit http://127.0.0.1:8080 to use your application"
  kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
 {{- end }}
--- a/helm/charts/text-generation-inference/templates/_helpers.tpl
+++ b/helm/charts/text-generation-inference/templates/_helpers.tpl
@ -0,0 +1,62 @@
 {{/*
 Expand the name of the chart.
 */}}
 {{- define "text-generation-inference.name" -}}
 {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
 {{- end }}
 {{/*
 Create a default fully qualified app name.
 We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
 If release name contains chart name it will be used as a full name.
 */}}
 {{- define "text-generation-inference.fullname" -}}
 {{- if .Values.fullnameOverride }}
 {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
 {{- else }}
 {{- $name := default .Chart.Name .Values.nameOverride }}
 {{- if contains $name .Release.Name }}
 {{- .Release.Name | trunc 63 | trimSuffix "-" }}
 {{- else }}
 {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
 {{- end }}
 {{- end }}
 {{- end }}
 {{/*
 Create chart name and version as used by the chart label.
 */}}
 {{- define "text-generation-inference.chart" -}}
 {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
 {{- end }}
 {{/*
 Common labels
 */}}
 {{- define "text-generation-inference.labels" -}}
 helm.sh/chart: {{ include "text-generation-inference.chart" . }}
 {{ include "text-generation-inference.selectorLabels" . }}
 {{- if .Chart.AppVersion }}
 app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
 {{- end }}
 app.kubernetes.io/managed-by: {{ .Release.Service }}
 {{- end }}
 {{/*
 Selector labels
 */}}
 {{- define "text-generation-inference.selectorLabels" -}}
 app.kubernetes.io/name: {{ include "text-generation-inference.name" . }}
 app.kubernetes.io/instance: {{ .Release.Name }}
 {{- end }}
 {{/*
 Create the name of the service account to use
 */}}
 {{- define "text-generation-inference.serviceAccountName" -}}
 {{- if .Values.serviceAccount.create }}
 {{- default (include "text-generation-inference.fullname" .) .Values.serviceAccount.name }}
 {{- else }}
 {{- default "default" .Values.serviceAccount.name }}
 {{- end }}
 {{- end }}
--- a/helm/charts/text-generation-inference/templates/ingress.yaml
+++ b/helm/charts/text-generation-inference/templates/ingress.yaml
@ -0,0 +1,61 @@
 {{- if .Values.ingress.enabled -}}
 {{- $fullName := include "text-generation-inference.fullname" . -}}
 {{- $svcPort := .Values.service.port -}}
 {{- if and .Values.ingress.className (not (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion)) }}
  {{- if not (hasKey .Values.ingress.annotations "kubernetes.io/ingress.class") }}
  {{- $_ := set .Values.ingress.annotations "kubernetes.io/ingress.class" .Values.ingress.className}}
  {{- end }}
 {{- end }}
 {{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion -}}
 apiVersion: networking.k8s.io/v1
 {{- else if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion -}}
 apiVersion: networking.k8s.io/v1beta1
 {{- else -}}
 apiVersion: extensions/v1beta1
 {{- end }}
 kind: Ingress
 metadata:
  name: {{ $fullName }}
  labels:
    {{- include "text-generation-inference.labels" . | nindent 4 }}
  {{- with .Values.ingress.annotations }}
  annotations:
    {{- toYaml . | nindent 4 }}
  {{- end }}
 spec:
  {{- if and .Values.ingress.className (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion) }}
  ingressClassName: {{ .Values.ingress.className }}
  {{- end }}
  {{- if .Values.ingress.tls }}
  tls:
    {{- range .Values.ingress.tls }}
    - hosts:
        {{- range .hosts }}
        - {{ . | quote }}
        {{- end }}
      secretName: {{ .secretName }}
    {{- end }}
  {{- end }}
  rules:
    {{- range .Values.ingress.hosts }}
    - host: {{ .host | quote }}
      http:
        paths:
          {{- range .paths }}
          - path: {{ .path }}
            {{- if and .pathType (semverCompare ">=1.18-0" $.Capabilities.KubeVersion.GitVersion) }}
            pathType: {{ .pathType }}
            {{- end }}
            backend:
              {{- if semverCompare ">=1.19-0" $.Capabilities.KubeVersion.GitVersion }}
              service:
                name: {{ $fullName }}
                port:
                  number: {{ $svcPort }}
              {{- else }}
              serviceName: {{ $fullName }}
              servicePort: {{ $svcPort }}
              {{- end }}
          {{- end }}
    {{- end }}
 {{- end }}
--- a/helm/charts/text-generation-inference/templates/poddisruptionbudget.yaml
+++ b/helm/charts/text-generation-inference/templates/poddisruptionbudget.yaml
@ -0,0 +1,22 @@
 {{- if .Values.podDisruptionBudget }}
 apiVersion: policy/v1
 kind: PodDisruptionBudget
 metadata:
  name: {{ include "text-generation-inference.fullname" . }}
  namespace: {{ .Release.Namespace }}
  labels:
    {{- include "text-generation-inference.labels" . | nindent 4 }}
    {{- with .Values.labels }}
    {{- toYaml . | nindent 4 }}
    {{- end }}
 spec:
  {{- with .Values.podDisruptionBudget.minAvailable }}
  minAvailable: {{ . }}
  {{- end }}
  {{- with .Values.podDisruptionBudget.maxUnavailable }}
  maxUnavailable: {{ . }}
  {{- end }}
  selector:
    matchLabels:
      {{- include "text-generation-inference.selectorLabels" . | nindent 6 }}
 {{- end }}
--- a/helm/charts/text-generation-inference/templates/service.yaml
+++ b/helm/charts/text-generation-inference/templates/service.yaml
@ -0,0 +1,16 @@
 apiVersion: v1
 kind: Service
 metadata:
  name: {{ include "text-generation-inference.fullname" . }}
  namespace: {{ .Release.Namespace }}
  labels:
    {{- include "text-generation-inference.labels" . | nindent 4 }}
 spec:
  type: {{ .Values.service.type }}
  ports:
    - port: {{ .Values.service.port }}
      targetPort: {{ .Values.service.targetPort }}
      protocol: TCP
      name: http
  selector:
    {{- include "text-generation-inference.selectorLabels" . | nindent 4 }}
--- a/helm/charts/text-generation-inference/templates/statefulset.yaml
+++ b/helm/charts/text-generation-inference/templates/statefulset.yaml
@ -0,0 +1,55 @@
 apiVersion: apps/v1
 kind: StatefulSet
 metadata:
  name: {{ include "text-generation-inference.fullname" . }}
  namespace: {{ .Release.Namespace }}
  labels:
    {{- include "text-generation-inference.labels" . | nindent 4 }}
 spec:
  replicas: {{ .Values.replicaCount }}
  serviceName: text-generation-inference
  selector:
    matchLabels:
      {{- include "text-generation-inference.selectorLabels" . | nindent 6 }}
  template:
    metadata:
      name: text-generation-inference
      labels:
        {{- include "text-generation-inference.selectorLabels" . | nindent 8 }}
    spec:
      containers:
        - name: text-generation-inference
          image: {{ .Values.image.repository }}:{{ .Values.image.tag }}
          imagePullPolicy: {{ .Values.image.imagePullPolicy }}
          ports:
            - containerPort: 80
          command:
            - "text-generation-launcher"
            {{- range .Values.args }}
            - {{ . | quote }}
            {{- end }}
          env:
          {{- range $key, $value := .Values.env }}
            - name: {{ $key }}
              value: {{ $value | quote }}
          {{- end }}
          volumeMounts:
            - mountPath: "/data"
              name: storage
      volumes:
        - name: storage
          persistentVolumeClaim:
            claimName: storage
      restartPolicy: Always
  volumeClaimTemplates:
    - metadata:
        name: storage
      spec:
        accessModes:
          {{- range .Values.persistence.accessModes }}
          - {{ . | quote }}
          {{- end }}
        storageClassName: {{ .Values.persistence.storageClassName }}
        resources:
          requests:
            storage: {{ .Values.persistence.storage }}
--- a/helm/charts/text-generation-inference/values.yaml
+++ b/helm/charts/text-generation-inference/values.yaml
@ -0,0 +1,69 @@
 ---
 replicaCount: 1
 image:
  repository: ghcr.io/huggingface/text-generation-inference
  tag: "1.1.0"
  pullPolicy: IfNotPresent
 nameOverride: ""
 fullnameOverride: ""
 service:
  type: ClusterIP
  port: 80
 resources:
  limits:
    nvidia.com/gpu: "1"
  requests:
    nvidia.com/gpu: "1"
 args:
  []
  # See: https://huggingface.co/docs/text-generation-inference/basic_tutorials/launcher
  # - "--model-id"
  # - "bigcode/starcoderbase-7b"
  # - "--revision"
  # - "4ab631381edb607557cbb04b6e9a225bad16807c"
  # - "--num-shard"
  # - "1"
 env:
  {}
  # See: https://huggingface.co/settings/tokens
  # HUGGING_FACE_HUB_TOKEN: xxx
 ingress:
  enabled: false
  className: ""
  annotations:
    {}
    # kubernetes.io/ingress.class: nginx
    # kubernetes.io/tls-acme: "true"
  hosts:
    - host: chart-example.local
      paths:
        - path: /
          pathType: Prefix
          # backend:
          #   service:
          #     name: text-generation-inference
          #     port:
          #       number: 80
  tls: []
  #  - secretName: chart-example-tls
  #    hosts:
  #      - chart-example.local
 persistence:
  storageClassName: "default"
  accessModes: ["ReadWriteOnce"]
  storage: 10Gi
 ## See `kubectl explain poddisruptionbudget.spec` for more
 ## ref: https://kubernetes.io/docs/tasks/run-application/configure-pdb/
 podDisruptionBudget:
  apiVersion: "policy/v1"
  minAvailable: 1
  maxUnavailable: 1