Introduce basic helm chart

Signed-off-by: Wilfried Roset <wilfriedroset@users.noreply.github.com>
This commit is contained in:
Wilfried Roset 2023-11-09 21:19:45 +01:00
parent a5def7c222
commit b7a825d0b5
No known key found for this signature in database
GPG Key ID: 60E3203B4CBA1AEB
10 changed files with 386 additions and 0 deletions

View File

@ -0,0 +1,23 @@
# Patterns to ignore when building packages.
# This supports shell glob matching, relative path matching, and
# negation (prefixed with !). Only one pattern per line.
.DS_Store
# Common VCS dirs
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
# Common backup files
*.swp
*.bak
*.tmp
*.orig
*~
# Various IDEs
.project
.idea/
*.tmproj
.vscode/

View File

@ -0,0 +1,29 @@
apiVersion: v2
name: text-generation-inference
description: A Helm chart Huggingface's text generation inference
type: application
version: 0.1.0
appVersion: "1.1.0"
kubeVersion: "^1.27.0-0"
home: https://github.com/huggingface/text-generation-inference
annotations:
"artifacthub.io/license": HFOILv1.0
"artifacthub.io/links": |
- name: Upstream Project
url: https://github.com/huggingface/text-generation-inference
- name: Documentation
url: https://huggingface.co/docs/text-generation-inference/index
maintainers:
- name: wilfriedroset
keywords:
- bloom
- deep-learning
- falcon
- gpt
- inference
- inference
- llm
- nlp
- pytorch
- starcoder
- transformer

View File

@ -0,0 +1,27 @@
# Text Generation Inference chart
Helm chart for deploying [Text Generation Inference](https://huggingface.co/docs/text-generation-inference) to Kubernetes.
## Installation
### Starcoder
Here is an example of the values to pass to the chart in order to deploy [bigcode/starcoderbase-7b](https://huggingface.co/bigcode/starcoderbase-7b)
```yaml
---
args:
- "--model-id"
- "bigcode/starcoderbase-7b"
- "--num-shard"
- "1"
env:
HUGGING_FACE_HUB_TOKEN: hf_FIXME
persistence:
storageClassName: "default"
accessModes: ["ReadWriteOnce"]
storage: 150Gi
```
```shell
helm install -f values.yaml startcoder .
```

View File

@ -0,0 +1,22 @@
1. Get the application URL by running these commands:
{{- if .Values.ingress.enabled }}
{{- range $host := .Values.ingress.hosts }}
{{- range .paths }}
http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ .path }}
{{- end }}
{{- end }}
{{- else if contains "NodePort" .Values.service.type }}
export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "text-generation-inference.fullname" . }})
export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
echo http://$NODE_IP:$NODE_PORT
{{- else if contains "LoadBalancer" .Values.service.type }}
NOTE: It may take a few minutes for the LoadBalancer IP to be available.
You can watch the status of by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "text-generation-inference.fullname" . }}'
export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "text-generation-inference.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
echo http://$SERVICE_IP:{{ .Values.service.port }}
{{- else if contains "ClusterIP" .Values.service.type }}
export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "text-generation-inference.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
echo "Visit http://127.0.0.1:8080 to use your application"
kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
{{- end }}

View File

@ -0,0 +1,62 @@
{{/*
Expand the name of the chart.
*/}}
{{- define "text-generation-inference.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
{{- end }}
{{/*
Create a default fully qualified app name.
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
If release name contains chart name it will be used as a full name.
*/}}
{{- define "text-generation-inference.fullname" -}}
{{- if .Values.fullnameOverride }}
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- $name := default .Chart.Name .Values.nameOverride }}
{{- if contains $name .Release.Name }}
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
{{- end }}
{{- end }}
{{- end }}
{{/*
Create chart name and version as used by the chart label.
*/}}
{{- define "text-generation-inference.chart" -}}
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
{{- end }}
{{/*
Common labels
*/}}
{{- define "text-generation-inference.labels" -}}
helm.sh/chart: {{ include "text-generation-inference.chart" . }}
{{ include "text-generation-inference.selectorLabels" . }}
{{- if .Chart.AppVersion }}
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
{{- end }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
{{- end }}
{{/*
Selector labels
*/}}
{{- define "text-generation-inference.selectorLabels" -}}
app.kubernetes.io/name: {{ include "text-generation-inference.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- end }}
{{/*
Create the name of the service account to use
*/}}
{{- define "text-generation-inference.serviceAccountName" -}}
{{- if .Values.serviceAccount.create }}
{{- default (include "text-generation-inference.fullname" .) .Values.serviceAccount.name }}
{{- else }}
{{- default "default" .Values.serviceAccount.name }}
{{- end }}
{{- end }}

View File

@ -0,0 +1,61 @@
{{- if .Values.ingress.enabled -}}
{{- $fullName := include "text-generation-inference.fullname" . -}}
{{- $svcPort := .Values.service.port -}}
{{- if and .Values.ingress.className (not (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion)) }}
{{- if not (hasKey .Values.ingress.annotations "kubernetes.io/ingress.class") }}
{{- $_ := set .Values.ingress.annotations "kubernetes.io/ingress.class" .Values.ingress.className}}
{{- end }}
{{- end }}
{{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion -}}
apiVersion: networking.k8s.io/v1
{{- else if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion -}}
apiVersion: networking.k8s.io/v1beta1
{{- else -}}
apiVersion: extensions/v1beta1
{{- end }}
kind: Ingress
metadata:
name: {{ $fullName }}
labels:
{{- include "text-generation-inference.labels" . | nindent 4 }}
{{- with .Values.ingress.annotations }}
annotations:
{{- toYaml . | nindent 4 }}
{{- end }}
spec:
{{- if and .Values.ingress.className (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion) }}
ingressClassName: {{ .Values.ingress.className }}
{{- end }}
{{- if .Values.ingress.tls }}
tls:
{{- range .Values.ingress.tls }}
- hosts:
{{- range .hosts }}
- {{ . | quote }}
{{- end }}
secretName: {{ .secretName }}
{{- end }}
{{- end }}
rules:
{{- range .Values.ingress.hosts }}
- host: {{ .host | quote }}
http:
paths:
{{- range .paths }}
- path: {{ .path }}
{{- if and .pathType (semverCompare ">=1.18-0" $.Capabilities.KubeVersion.GitVersion) }}
pathType: {{ .pathType }}
{{- end }}
backend:
{{- if semverCompare ">=1.19-0" $.Capabilities.KubeVersion.GitVersion }}
service:
name: {{ $fullName }}
port:
number: {{ $svcPort }}
{{- else }}
serviceName: {{ $fullName }}
servicePort: {{ $svcPort }}
{{- end }}
{{- end }}
{{- end }}
{{- end }}

View File

@ -0,0 +1,22 @@
{{- if .Values.podDisruptionBudget }}
apiVersion: policy/v1
kind: PodDisruptionBudget
metadata:
name: {{ include "text-generation-inference.fullname" . }}
namespace: {{ .Release.Namespace }}
labels:
{{- include "text-generation-inference.labels" . | nindent 4 }}
{{- with .Values.labels }}
{{- toYaml . | nindent 4 }}
{{- end }}
spec:
{{- with .Values.podDisruptionBudget.minAvailable }}
minAvailable: {{ . }}
{{- end }}
{{- with .Values.podDisruptionBudget.maxUnavailable }}
maxUnavailable: {{ . }}
{{- end }}
selector:
matchLabels:
{{- include "text-generation-inference.selectorLabels" . | nindent 6 }}
{{- end }}

View File

@ -0,0 +1,16 @@
apiVersion: v1
kind: Service
metadata:
name: {{ include "text-generation-inference.fullname" . }}
namespace: {{ .Release.Namespace }}
labels:
{{- include "text-generation-inference.labels" . | nindent 4 }}
spec:
type: {{ .Values.service.type }}
ports:
- port: {{ .Values.service.port }}
targetPort: {{ .Values.service.targetPort }}
protocol: TCP
name: http
selector:
{{- include "text-generation-inference.selectorLabels" . | nindent 4 }}

View File

@ -0,0 +1,55 @@
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: {{ include "text-generation-inference.fullname" . }}
namespace: {{ .Release.Namespace }}
labels:
{{- include "text-generation-inference.labels" . | nindent 4 }}
spec:
replicas: {{ .Values.replicaCount }}
serviceName: text-generation-inference
selector:
matchLabels:
{{- include "text-generation-inference.selectorLabels" . | nindent 6 }}
template:
metadata:
name: text-generation-inference
labels:
{{- include "text-generation-inference.selectorLabels" . | nindent 8 }}
spec:
containers:
- name: text-generation-inference
image: {{ .Values.image.repository }}:{{ .Values.image.tag }}
imagePullPolicy: {{ .Values.image.imagePullPolicy }}
ports:
- containerPort: 80
command:
- "text-generation-launcher"
{{- range .Values.args }}
- {{ . | quote }}
{{- end }}
env:
{{- range $key, $value := .Values.env }}
- name: {{ $key }}
value: {{ $value | quote }}
{{- end }}
volumeMounts:
- mountPath: "/data"
name: storage
volumes:
- name: storage
persistentVolumeClaim:
claimName: storage
restartPolicy: Always
volumeClaimTemplates:
- metadata:
name: storage
spec:
accessModes:
{{- range .Values.persistence.accessModes }}
- {{ . | quote }}
{{- end }}
storageClassName: {{ .Values.persistence.storageClassName }}
resources:
requests:
storage: {{ .Values.persistence.storage }}

View File

@ -0,0 +1,69 @@
---
replicaCount: 1
image:
repository: ghcr.io/huggingface/text-generation-inference
tag: "1.1.0"
pullPolicy: IfNotPresent
nameOverride: ""
fullnameOverride: ""
service:
type: ClusterIP
port: 80
resources:
limits:
nvidia.com/gpu: "1"
requests:
nvidia.com/gpu: "1"
args:
[]
# See: https://huggingface.co/docs/text-generation-inference/basic_tutorials/launcher
# - "--model-id"
# - "bigcode/starcoderbase-7b"
# - "--revision"
# - "4ab631381edb607557cbb04b6e9a225bad16807c"
# - "--num-shard"
# - "1"
env:
{}
# See: https://huggingface.co/settings/tokens
# HUGGING_FACE_HUB_TOKEN: xxx
ingress:
enabled: false
className: ""
annotations:
{}
# kubernetes.io/ingress.class: nginx
# kubernetes.io/tls-acme: "true"
hosts:
- host: chart-example.local
paths:
- path: /
pathType: Prefix
# backend:
# service:
# name: text-generation-inference
# port:
# number: 80
tls: []
# - secretName: chart-example-tls
# hosts:
# - chart-example.local
persistence:
storageClassName: "default"
accessModes: ["ReadWriteOnce"]
storage: 10Gi
## See `kubectl explain poddisruptionbudget.spec` for more
## ref: https://kubernetes.io/docs/tasks/run-application/configure-pdb/
podDisruptionBudget:
apiVersion: "policy/v1"
minAvailable: 1
maxUnavailable: 1