diff --git a/helm/charts/text-generation-inference/.helmignore b/helm/charts/text-generation-inference/.helmignore new file mode 100644 index 00000000..0e8a0eb3 --- /dev/null +++ b/helm/charts/text-generation-inference/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/helm/charts/text-generation-inference/Chart.yaml b/helm/charts/text-generation-inference/Chart.yaml new file mode 100644 index 00000000..9548fe69 --- /dev/null +++ b/helm/charts/text-generation-inference/Chart.yaml @@ -0,0 +1,29 @@ +apiVersion: v2 +name: text-generation-inference +description: A Helm chart Huggingface's text generation inference +type: application +version: 0.1.0 +appVersion: "1.1.0" +kubeVersion: "^1.27.0-0" +home: https://github.com/huggingface/text-generation-inference +annotations: + "artifacthub.io/license": HFOILv1.0 + "artifacthub.io/links": | + - name: Upstream Project + url: https://github.com/huggingface/text-generation-inference + - name: Documentation + url: https://huggingface.co/docs/text-generation-inference/index +maintainers: + - name: wilfriedroset +keywords: + - bloom + - deep-learning + - falcon + - gpt + - inference + - inference + - llm + - nlp + - pytorch + - starcoder + - transformer diff --git a/helm/charts/text-generation-inference/README.md b/helm/charts/text-generation-inference/README.md new file mode 100644 index 00000000..741ccfc3 --- /dev/null +++ b/helm/charts/text-generation-inference/README.md @@ -0,0 +1,27 @@ +# Text Generation Inference chart + +Helm chart for deploying [Text Generation Inference](https://huggingface.co/docs/text-generation-inference) to Kubernetes. + +## Installation +### Starcoder + +Here is an example of the values to pass to the chart in order to deploy [bigcode/starcoderbase-7b](https://huggingface.co/bigcode/starcoderbase-7b) +```yaml +--- +args: + - "--model-id" + - "bigcode/starcoderbase-7b" + - "--num-shard" + - "1" + +env: + HUGGING_FACE_HUB_TOKEN: hf_FIXME + +persistence: + storageClassName: "default" + accessModes: ["ReadWriteOnce"] + storage: 150Gi +``` +```shell +helm install -f values.yaml startcoder . +``` diff --git a/helm/charts/text-generation-inference/templates/NOTES.txt b/helm/charts/text-generation-inference/templates/NOTES.txt new file mode 100644 index 00000000..86ad0a10 --- /dev/null +++ b/helm/charts/text-generation-inference/templates/NOTES.txt @@ -0,0 +1,22 @@ +1. Get the application URL by running these commands: +{{- if .Values.ingress.enabled }} +{{- range $host := .Values.ingress.hosts }} + {{- range .paths }} + http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ .path }} + {{- end }} +{{- end }} +{{- else if contains "NodePort" .Values.service.type }} + export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "text-generation-inference.fullname" . }}) + export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}") + echo http://$NODE_IP:$NODE_PORT +{{- else if contains "LoadBalancer" .Values.service.type }} + NOTE: It may take a few minutes for the LoadBalancer IP to be available. + You can watch the status of by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "text-generation-inference.fullname" . }}' + export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "text-generation-inference.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}") + echo http://$SERVICE_IP:{{ .Values.service.port }} +{{- else if contains "ClusterIP" .Values.service.type }} + export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "text-generation-inference.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}") + export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}") + echo "Visit http://127.0.0.1:8080 to use your application" + kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT +{{- end }} diff --git a/helm/charts/text-generation-inference/templates/_helpers.tpl b/helm/charts/text-generation-inference/templates/_helpers.tpl new file mode 100644 index 00000000..acff96ae --- /dev/null +++ b/helm/charts/text-generation-inference/templates/_helpers.tpl @@ -0,0 +1,62 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "text-generation-inference.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "text-generation-inference.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "text-generation-inference.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "text-generation-inference.labels" -}} +helm.sh/chart: {{ include "text-generation-inference.chart" . }} +{{ include "text-generation-inference.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "text-generation-inference.selectorLabels" -}} +app.kubernetes.io/name: {{ include "text-generation-inference.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "text-generation-inference.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "text-generation-inference.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} diff --git a/helm/charts/text-generation-inference/templates/ingress.yaml b/helm/charts/text-generation-inference/templates/ingress.yaml new file mode 100644 index 00000000..7b285a3d --- /dev/null +++ b/helm/charts/text-generation-inference/templates/ingress.yaml @@ -0,0 +1,61 @@ +{{- if .Values.ingress.enabled -}} +{{- $fullName := include "text-generation-inference.fullname" . -}} +{{- $svcPort := .Values.service.port -}} +{{- if and .Values.ingress.className (not (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion)) }} + {{- if not (hasKey .Values.ingress.annotations "kubernetes.io/ingress.class") }} + {{- $_ := set .Values.ingress.annotations "kubernetes.io/ingress.class" .Values.ingress.className}} + {{- end }} +{{- end }} +{{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion -}} +apiVersion: networking.k8s.io/v1 +{{- else if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion -}} +apiVersion: networking.k8s.io/v1beta1 +{{- else -}} +apiVersion: extensions/v1beta1 +{{- end }} +kind: Ingress +metadata: + name: {{ $fullName }} + labels: + {{- include "text-generation-inference.labels" . | nindent 4 }} + {{- with .Values.ingress.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + {{- if and .Values.ingress.className (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion) }} + ingressClassName: {{ .Values.ingress.className }} + {{- end }} + {{- if .Values.ingress.tls }} + tls: + {{- range .Values.ingress.tls }} + - hosts: + {{- range .hosts }} + - {{ . | quote }} + {{- end }} + secretName: {{ .secretName }} + {{- end }} + {{- end }} + rules: + {{- range .Values.ingress.hosts }} + - host: {{ .host | quote }} + http: + paths: + {{- range .paths }} + - path: {{ .path }} + {{- if and .pathType (semverCompare ">=1.18-0" $.Capabilities.KubeVersion.GitVersion) }} + pathType: {{ .pathType }} + {{- end }} + backend: + {{- if semverCompare ">=1.19-0" $.Capabilities.KubeVersion.GitVersion }} + service: + name: {{ $fullName }} + port: + number: {{ $svcPort }} + {{- else }} + serviceName: {{ $fullName }} + servicePort: {{ $svcPort }} + {{- end }} + {{- end }} + {{- end }} +{{- end }} diff --git a/helm/charts/text-generation-inference/templates/poddisruptionbudget.yaml b/helm/charts/text-generation-inference/templates/poddisruptionbudget.yaml new file mode 100644 index 00000000..b0282e09 --- /dev/null +++ b/helm/charts/text-generation-inference/templates/poddisruptionbudget.yaml @@ -0,0 +1,22 @@ +{{- if .Values.podDisruptionBudget }} +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + name: {{ include "text-generation-inference.fullname" . }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "text-generation-inference.labels" . | nindent 4 }} + {{- with .Values.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + {{- with .Values.podDisruptionBudget.minAvailable }} + minAvailable: {{ . }} + {{- end }} + {{- with .Values.podDisruptionBudget.maxUnavailable }} + maxUnavailable: {{ . }} + {{- end }} + selector: + matchLabels: + {{- include "text-generation-inference.selectorLabels" . | nindent 6 }} +{{- end }} diff --git a/helm/charts/text-generation-inference/templates/service.yaml b/helm/charts/text-generation-inference/templates/service.yaml new file mode 100644 index 00000000..98ff5283 --- /dev/null +++ b/helm/charts/text-generation-inference/templates/service.yaml @@ -0,0 +1,16 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "text-generation-inference.fullname" . }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "text-generation-inference.labels" . | nindent 4 }} +spec: + type: {{ .Values.service.type }} + ports: + - port: {{ .Values.service.port }} + targetPort: {{ .Values.service.targetPort }} + protocol: TCP + name: http + selector: + {{- include "text-generation-inference.selectorLabels" . | nindent 4 }} diff --git a/helm/charts/text-generation-inference/templates/statefulset.yaml b/helm/charts/text-generation-inference/templates/statefulset.yaml new file mode 100644 index 00000000..471259d3 --- /dev/null +++ b/helm/charts/text-generation-inference/templates/statefulset.yaml @@ -0,0 +1,67 @@ +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: {{ include "text-generation-inference.fullname" . }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "text-generation-inference.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.replicaCount }} + serviceName: text-generation-inference + selector: + matchLabels: + {{- include "text-generation-inference.selectorLabels" . | nindent 6 }} + template: + metadata: + name: text-generation-inference + labels: + {{- include "text-generation-inference.selectorLabels" . | nindent 8 }} + spec: + containers: + - name: text-generation-inference + image: {{ .Values.image.repository }}:{{ .Values.image.tag }} + imagePullPolicy: {{ .Values.image.imagePullPolicy }} + ports: + - containerPort: 80 + command: + - "text-generation-launcher" + {{- range .Values.args }} + - {{ . | quote }} + {{- end }} + env: + {{- range $key, $value := .Values.env }} + - name: {{ $key }} + value: {{ $value | quote }} + {{- end }} + volumeMounts: + - mountPath: "/data" + name: storage + volumes: + - name: storage + persistentVolumeClaim: + claimName: {{ .Release.Name }}-storage + restartPolicy: Always + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + volumeClaimTemplates: + - metadata: + name: {{ .Release.Name }}-storage + spec: + accessModes: + {{- range .Values.persistence.accessModes }} + - {{ . | quote }} + {{- end }} + storageClassName: {{ .Values.persistence.storageClassName }} + resources: + requests: + storage: {{ .Values.persistence.storage }} diff --git a/helm/charts/text-generation-inference/values.yaml b/helm/charts/text-generation-inference/values.yaml new file mode 100644 index 00000000..44cfbda8 --- /dev/null +++ b/helm/charts/text-generation-inference/values.yaml @@ -0,0 +1,69 @@ +--- +replicaCount: 1 + +image: + repository: ghcr.io/huggingface/text-generation-inference + tag: "1.1.0" + pullPolicy: IfNotPresent + +nameOverride: "" +fullnameOverride: "" + +service: + type: ClusterIP + port: 80 + +resources: + limits: + nvidia.com/gpu: "1" + requests: + nvidia.com/gpu: "1" + +args: + [] + # See: https://huggingface.co/docs/text-generation-inference/basic_tutorials/launcher + # - "--model-id" + # - "bigcode/starcoderbase-7b" + # - "--revision" + # - "4ab631381edb607557cbb04b6e9a225bad16807c" + # - "--num-shard" + # - "1" + +env: + {} + # See: https://huggingface.co/settings/tokens + # HUGGING_FACE_HUB_TOKEN: xxx + +ingress: + enabled: false + className: "" + annotations: + {} + # kubernetes.io/ingress.class: nginx + # kubernetes.io/tls-acme: "true" + hosts: + - host: chart-example.local + paths: + - path: / + pathType: Prefix + # backend: + # service: + # name: text-generation-inference + # port: + # number: 80 + tls: [] + # - secretName: chart-example-tls + # hosts: + # - chart-example.local + +persistence: + storageClassName: "default" + accessModes: ["ReadWriteOnce"] + storage: 10Gi + +## See `kubectl explain poddisruptionbudget.spec` for more +## ref: https://kubernetes.io/docs/tasks/run-application/configure-pdb/ +podDisruptionBudget: + apiVersion: "policy/v1" + minAvailable: 1 + maxUnavailable: 1