mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 04:14:52 +00:00
Introduce basic helm chart
Signed-off-by: Wilfried Roset <wilfriedroset@users.noreply.github.com>
This commit is contained in:
parent
a5def7c222
commit
b7a825d0b5
23
helm/charts/text-generation-inference/.helmignore
Normal file
23
helm/charts/text-generation-inference/.helmignore
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
# Patterns to ignore when building packages.
|
||||||
|
# This supports shell glob matching, relative path matching, and
|
||||||
|
# negation (prefixed with !). Only one pattern per line.
|
||||||
|
.DS_Store
|
||||||
|
# Common VCS dirs
|
||||||
|
.git/
|
||||||
|
.gitignore
|
||||||
|
.bzr/
|
||||||
|
.bzrignore
|
||||||
|
.hg/
|
||||||
|
.hgignore
|
||||||
|
.svn/
|
||||||
|
# Common backup files
|
||||||
|
*.swp
|
||||||
|
*.bak
|
||||||
|
*.tmp
|
||||||
|
*.orig
|
||||||
|
*~
|
||||||
|
# Various IDEs
|
||||||
|
.project
|
||||||
|
.idea/
|
||||||
|
*.tmproj
|
||||||
|
.vscode/
|
29
helm/charts/text-generation-inference/Chart.yaml
Normal file
29
helm/charts/text-generation-inference/Chart.yaml
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
apiVersion: v2
|
||||||
|
name: text-generation-inference
|
||||||
|
description: A Helm chart Huggingface's text generation inference
|
||||||
|
type: application
|
||||||
|
version: 0.1.0
|
||||||
|
appVersion: "1.1.0"
|
||||||
|
kubeVersion: "^1.27.0-0"
|
||||||
|
home: https://github.com/huggingface/text-generation-inference
|
||||||
|
annotations:
|
||||||
|
"artifacthub.io/license": HFOILv1.0
|
||||||
|
"artifacthub.io/links": |
|
||||||
|
- name: Upstream Project
|
||||||
|
url: https://github.com/huggingface/text-generation-inference
|
||||||
|
- name: Documentation
|
||||||
|
url: https://huggingface.co/docs/text-generation-inference/index
|
||||||
|
maintainers:
|
||||||
|
- name: wilfriedroset
|
||||||
|
keywords:
|
||||||
|
- bloom
|
||||||
|
- deep-learning
|
||||||
|
- falcon
|
||||||
|
- gpt
|
||||||
|
- inference
|
||||||
|
- inference
|
||||||
|
- llm
|
||||||
|
- nlp
|
||||||
|
- pytorch
|
||||||
|
- starcoder
|
||||||
|
- transformer
|
27
helm/charts/text-generation-inference/README.md
Normal file
27
helm/charts/text-generation-inference/README.md
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
# Text Generation Inference chart
|
||||||
|
|
||||||
|
Helm chart for deploying [Text Generation Inference](https://huggingface.co/docs/text-generation-inference) to Kubernetes.
|
||||||
|
|
||||||
|
## Installation
|
||||||
|
### Starcoder
|
||||||
|
|
||||||
|
Here is an example of the values to pass to the chart in order to deploy [bigcode/starcoderbase-7b](https://huggingface.co/bigcode/starcoderbase-7b)
|
||||||
|
```yaml
|
||||||
|
---
|
||||||
|
args:
|
||||||
|
- "--model-id"
|
||||||
|
- "bigcode/starcoderbase-7b"
|
||||||
|
- "--num-shard"
|
||||||
|
- "1"
|
||||||
|
|
||||||
|
env:
|
||||||
|
HUGGING_FACE_HUB_TOKEN: hf_FIXME
|
||||||
|
|
||||||
|
persistence:
|
||||||
|
storageClassName: "default"
|
||||||
|
accessModes: ["ReadWriteOnce"]
|
||||||
|
storage: 150Gi
|
||||||
|
```
|
||||||
|
```shell
|
||||||
|
helm install -f values.yaml startcoder .
|
||||||
|
```
|
22
helm/charts/text-generation-inference/templates/NOTES.txt
Normal file
22
helm/charts/text-generation-inference/templates/NOTES.txt
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
1. Get the application URL by running these commands:
|
||||||
|
{{- if .Values.ingress.enabled }}
|
||||||
|
{{- range $host := .Values.ingress.hosts }}
|
||||||
|
{{- range .paths }}
|
||||||
|
http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ .path }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
{{- else if contains "NodePort" .Values.service.type }}
|
||||||
|
export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "text-generation-inference.fullname" . }})
|
||||||
|
export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
|
||||||
|
echo http://$NODE_IP:$NODE_PORT
|
||||||
|
{{- else if contains "LoadBalancer" .Values.service.type }}
|
||||||
|
NOTE: It may take a few minutes for the LoadBalancer IP to be available.
|
||||||
|
You can watch the status of by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "text-generation-inference.fullname" . }}'
|
||||||
|
export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "text-generation-inference.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
|
||||||
|
echo http://$SERVICE_IP:{{ .Values.service.port }}
|
||||||
|
{{- else if contains "ClusterIP" .Values.service.type }}
|
||||||
|
export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "text-generation-inference.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
|
||||||
|
export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
|
||||||
|
echo "Visit http://127.0.0.1:8080 to use your application"
|
||||||
|
kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
|
||||||
|
{{- end }}
|
62
helm/charts/text-generation-inference/templates/_helpers.tpl
Normal file
62
helm/charts/text-generation-inference/templates/_helpers.tpl
Normal file
@ -0,0 +1,62 @@
|
|||||||
|
{{/*
|
||||||
|
Expand the name of the chart.
|
||||||
|
*/}}
|
||||||
|
{{- define "text-generation-inference.name" -}}
|
||||||
|
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
|
||||||
|
{{- end }}
|
||||||
|
|
||||||
|
{{/*
|
||||||
|
Create a default fully qualified app name.
|
||||||
|
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
|
||||||
|
If release name contains chart name it will be used as a full name.
|
||||||
|
*/}}
|
||||||
|
{{- define "text-generation-inference.fullname" -}}
|
||||||
|
{{- if .Values.fullnameOverride }}
|
||||||
|
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
|
||||||
|
{{- else }}
|
||||||
|
{{- $name := default .Chart.Name .Values.nameOverride }}
|
||||||
|
{{- if contains $name .Release.Name }}
|
||||||
|
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
|
||||||
|
{{- else }}
|
||||||
|
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
|
||||||
|
{{/*
|
||||||
|
Create chart name and version as used by the chart label.
|
||||||
|
*/}}
|
||||||
|
{{- define "text-generation-inference.chart" -}}
|
||||||
|
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
|
||||||
|
{{- end }}
|
||||||
|
|
||||||
|
{{/*
|
||||||
|
Common labels
|
||||||
|
*/}}
|
||||||
|
{{- define "text-generation-inference.labels" -}}
|
||||||
|
helm.sh/chart: {{ include "text-generation-inference.chart" . }}
|
||||||
|
{{ include "text-generation-inference.selectorLabels" . }}
|
||||||
|
{{- if .Chart.AppVersion }}
|
||||||
|
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
|
||||||
|
{{- end }}
|
||||||
|
app.kubernetes.io/managed-by: {{ .Release.Service }}
|
||||||
|
{{- end }}
|
||||||
|
|
||||||
|
{{/*
|
||||||
|
Selector labels
|
||||||
|
*/}}
|
||||||
|
{{- define "text-generation-inference.selectorLabels" -}}
|
||||||
|
app.kubernetes.io/name: {{ include "text-generation-inference.name" . }}
|
||||||
|
app.kubernetes.io/instance: {{ .Release.Name }}
|
||||||
|
{{- end }}
|
||||||
|
|
||||||
|
{{/*
|
||||||
|
Create the name of the service account to use
|
||||||
|
*/}}
|
||||||
|
{{- define "text-generation-inference.serviceAccountName" -}}
|
||||||
|
{{- if .Values.serviceAccount.create }}
|
||||||
|
{{- default (include "text-generation-inference.fullname" .) .Values.serviceAccount.name }}
|
||||||
|
{{- else }}
|
||||||
|
{{- default "default" .Values.serviceAccount.name }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
61
helm/charts/text-generation-inference/templates/ingress.yaml
Normal file
61
helm/charts/text-generation-inference/templates/ingress.yaml
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
{{- if .Values.ingress.enabled -}}
|
||||||
|
{{- $fullName := include "text-generation-inference.fullname" . -}}
|
||||||
|
{{- $svcPort := .Values.service.port -}}
|
||||||
|
{{- if and .Values.ingress.className (not (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion)) }}
|
||||||
|
{{- if not (hasKey .Values.ingress.annotations "kubernetes.io/ingress.class") }}
|
||||||
|
{{- $_ := set .Values.ingress.annotations "kubernetes.io/ingress.class" .Values.ingress.className}}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion -}}
|
||||||
|
apiVersion: networking.k8s.io/v1
|
||||||
|
{{- else if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion -}}
|
||||||
|
apiVersion: networking.k8s.io/v1beta1
|
||||||
|
{{- else -}}
|
||||||
|
apiVersion: extensions/v1beta1
|
||||||
|
{{- end }}
|
||||||
|
kind: Ingress
|
||||||
|
metadata:
|
||||||
|
name: {{ $fullName }}
|
||||||
|
labels:
|
||||||
|
{{- include "text-generation-inference.labels" . | nindent 4 }}
|
||||||
|
{{- with .Values.ingress.annotations }}
|
||||||
|
annotations:
|
||||||
|
{{- toYaml . | nindent 4 }}
|
||||||
|
{{- end }}
|
||||||
|
spec:
|
||||||
|
{{- if and .Values.ingress.className (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion) }}
|
||||||
|
ingressClassName: {{ .Values.ingress.className }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.ingress.tls }}
|
||||||
|
tls:
|
||||||
|
{{- range .Values.ingress.tls }}
|
||||||
|
- hosts:
|
||||||
|
{{- range .hosts }}
|
||||||
|
- {{ . | quote }}
|
||||||
|
{{- end }}
|
||||||
|
secretName: {{ .secretName }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
rules:
|
||||||
|
{{- range .Values.ingress.hosts }}
|
||||||
|
- host: {{ .host | quote }}
|
||||||
|
http:
|
||||||
|
paths:
|
||||||
|
{{- range .paths }}
|
||||||
|
- path: {{ .path }}
|
||||||
|
{{- if and .pathType (semverCompare ">=1.18-0" $.Capabilities.KubeVersion.GitVersion) }}
|
||||||
|
pathType: {{ .pathType }}
|
||||||
|
{{- end }}
|
||||||
|
backend:
|
||||||
|
{{- if semverCompare ">=1.19-0" $.Capabilities.KubeVersion.GitVersion }}
|
||||||
|
service:
|
||||||
|
name: {{ $fullName }}
|
||||||
|
port:
|
||||||
|
number: {{ $svcPort }}
|
||||||
|
{{- else }}
|
||||||
|
serviceName: {{ $fullName }}
|
||||||
|
servicePort: {{ $svcPort }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
@ -0,0 +1,22 @@
|
|||||||
|
{{- if .Values.podDisruptionBudget }}
|
||||||
|
apiVersion: policy/v1
|
||||||
|
kind: PodDisruptionBudget
|
||||||
|
metadata:
|
||||||
|
name: {{ include "text-generation-inference.fullname" . }}
|
||||||
|
namespace: {{ .Release.Namespace }}
|
||||||
|
labels:
|
||||||
|
{{- include "text-generation-inference.labels" . | nindent 4 }}
|
||||||
|
{{- with .Values.labels }}
|
||||||
|
{{- toYaml . | nindent 4 }}
|
||||||
|
{{- end }}
|
||||||
|
spec:
|
||||||
|
{{- with .Values.podDisruptionBudget.minAvailable }}
|
||||||
|
minAvailable: {{ . }}
|
||||||
|
{{- end }}
|
||||||
|
{{- with .Values.podDisruptionBudget.maxUnavailable }}
|
||||||
|
maxUnavailable: {{ . }}
|
||||||
|
{{- end }}
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
{{- include "text-generation-inference.selectorLabels" . | nindent 6 }}
|
||||||
|
{{- end }}
|
16
helm/charts/text-generation-inference/templates/service.yaml
Normal file
16
helm/charts/text-generation-inference/templates/service.yaml
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: {{ include "text-generation-inference.fullname" . }}
|
||||||
|
namespace: {{ .Release.Namespace }}
|
||||||
|
labels:
|
||||||
|
{{- include "text-generation-inference.labels" . | nindent 4 }}
|
||||||
|
spec:
|
||||||
|
type: {{ .Values.service.type }}
|
||||||
|
ports:
|
||||||
|
- port: {{ .Values.service.port }}
|
||||||
|
targetPort: {{ .Values.service.targetPort }}
|
||||||
|
protocol: TCP
|
||||||
|
name: http
|
||||||
|
selector:
|
||||||
|
{{- include "text-generation-inference.selectorLabels" . | nindent 4 }}
|
@ -0,0 +1,55 @@
|
|||||||
|
apiVersion: apps/v1
|
||||||
|
kind: StatefulSet
|
||||||
|
metadata:
|
||||||
|
name: {{ include "text-generation-inference.fullname" . }}
|
||||||
|
namespace: {{ .Release.Namespace }}
|
||||||
|
labels:
|
||||||
|
{{- include "text-generation-inference.labels" . | nindent 4 }}
|
||||||
|
spec:
|
||||||
|
replicas: {{ .Values.replicaCount }}
|
||||||
|
serviceName: text-generation-inference
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
{{- include "text-generation-inference.selectorLabels" . | nindent 6 }}
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
name: text-generation-inference
|
||||||
|
labels:
|
||||||
|
{{- include "text-generation-inference.selectorLabels" . | nindent 8 }}
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: text-generation-inference
|
||||||
|
image: {{ .Values.image.repository }}:{{ .Values.image.tag }}
|
||||||
|
imagePullPolicy: {{ .Values.image.imagePullPolicy }}
|
||||||
|
ports:
|
||||||
|
- containerPort: 80
|
||||||
|
command:
|
||||||
|
- "text-generation-launcher"
|
||||||
|
{{- range .Values.args }}
|
||||||
|
- {{ . | quote }}
|
||||||
|
{{- end }}
|
||||||
|
env:
|
||||||
|
{{- range $key, $value := .Values.env }}
|
||||||
|
- name: {{ $key }}
|
||||||
|
value: {{ $value | quote }}
|
||||||
|
{{- end }}
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: "/data"
|
||||||
|
name: storage
|
||||||
|
volumes:
|
||||||
|
- name: storage
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: storage
|
||||||
|
restartPolicy: Always
|
||||||
|
volumeClaimTemplates:
|
||||||
|
- metadata:
|
||||||
|
name: storage
|
||||||
|
spec:
|
||||||
|
accessModes:
|
||||||
|
{{- range .Values.persistence.accessModes }}
|
||||||
|
- {{ . | quote }}
|
||||||
|
{{- end }}
|
||||||
|
storageClassName: {{ .Values.persistence.storageClassName }}
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: {{ .Values.persistence.storage }}
|
69
helm/charts/text-generation-inference/values.yaml
Normal file
69
helm/charts/text-generation-inference/values.yaml
Normal file
@ -0,0 +1,69 @@
|
|||||||
|
---
|
||||||
|
replicaCount: 1
|
||||||
|
|
||||||
|
image:
|
||||||
|
repository: ghcr.io/huggingface/text-generation-inference
|
||||||
|
tag: "1.1.0"
|
||||||
|
pullPolicy: IfNotPresent
|
||||||
|
|
||||||
|
nameOverride: ""
|
||||||
|
fullnameOverride: ""
|
||||||
|
|
||||||
|
service:
|
||||||
|
type: ClusterIP
|
||||||
|
port: 80
|
||||||
|
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
nvidia.com/gpu: "1"
|
||||||
|
requests:
|
||||||
|
nvidia.com/gpu: "1"
|
||||||
|
|
||||||
|
args:
|
||||||
|
[]
|
||||||
|
# See: https://huggingface.co/docs/text-generation-inference/basic_tutorials/launcher
|
||||||
|
# - "--model-id"
|
||||||
|
# - "bigcode/starcoderbase-7b"
|
||||||
|
# - "--revision"
|
||||||
|
# - "4ab631381edb607557cbb04b6e9a225bad16807c"
|
||||||
|
# - "--num-shard"
|
||||||
|
# - "1"
|
||||||
|
|
||||||
|
env:
|
||||||
|
{}
|
||||||
|
# See: https://huggingface.co/settings/tokens
|
||||||
|
# HUGGING_FACE_HUB_TOKEN: xxx
|
||||||
|
|
||||||
|
ingress:
|
||||||
|
enabled: false
|
||||||
|
className: ""
|
||||||
|
annotations:
|
||||||
|
{}
|
||||||
|
# kubernetes.io/ingress.class: nginx
|
||||||
|
# kubernetes.io/tls-acme: "true"
|
||||||
|
hosts:
|
||||||
|
- host: chart-example.local
|
||||||
|
paths:
|
||||||
|
- path: /
|
||||||
|
pathType: Prefix
|
||||||
|
# backend:
|
||||||
|
# service:
|
||||||
|
# name: text-generation-inference
|
||||||
|
# port:
|
||||||
|
# number: 80
|
||||||
|
tls: []
|
||||||
|
# - secretName: chart-example-tls
|
||||||
|
# hosts:
|
||||||
|
# - chart-example.local
|
||||||
|
|
||||||
|
persistence:
|
||||||
|
storageClassName: "default"
|
||||||
|
accessModes: ["ReadWriteOnce"]
|
||||||
|
storage: 10Gi
|
||||||
|
|
||||||
|
## See `kubectl explain poddisruptionbudget.spec` for more
|
||||||
|
## ref: https://kubernetes.io/docs/tasks/run-application/configure-pdb/
|
||||||
|
podDisruptionBudget:
|
||||||
|
apiVersion: "policy/v1"
|
||||||
|
minAvailable: 1
|
||||||
|
maxUnavailable: 1
|
Loading…
Reference in New Issue
Block a user