diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml new file mode 100644 index 0000000..8d5c446 --- /dev/null +++ b/.github/workflows/release.yaml @@ -0,0 +1,30 @@ +name: Release + +on: + push: + tags: + - '*' + +env: + REGISTRY_URL: registry-1.docker.io + REGISTRY_REPO: shalb/charts + +jobs: + release: + name: Release helm charts + runs-on: ubuntu-latest + container: alpine/helm:3.12.0 + steps: + - uses: actions/checkout@v2 + + - name: Set env + run: | + echo "RELEASE_VERSION=${GITHUB_REF#refs/*/}" >> $GITHUB_ENV + + - name: Build and push + run: | + echo ${{ secrets.DOCKERHUB_TOKEN }} | helm registry login -u ${{ secrets.DOCKERHUB_USERNAME }} --password-stdin ${{ env.REGISTRY_URL}} + for CHART_NAME in $(find -maxdepth 2 -type f -name "Chart.yaml" | cut -d"/" -f2); do + helm package --version ${{ env.RELEASE_VERSION }} ${CHART_NAME} + helm push ${CHART_NAME}-${{ env.RELEASE_VERSION }}.tgz oci://${{ env.REGISTRY_URL}}/${{ env.REGISTRY_REPO}} + done diff --git a/huggingface-model/.helmignore b/huggingface-model/.helmignore new file mode 100644 index 0000000..0e8a0eb --- /dev/null +++ b/huggingface-model/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/huggingface-model/Chart.yaml b/huggingface-model/Chart.yaml new file mode 100644 index 0000000..da2de9f --- /dev/null +++ b/huggingface-model/Chart.yaml @@ -0,0 +1,8 @@ +apiVersion: v2 +name: huggingface-model +description: Helm chart for deploy Hugging Face to kubernetes cluster. See [Hugging Face models](https://huggingface.co/models) + +type: application + +version: 0.0.1 + diff --git a/huggingface-model/README.md b/huggingface-model/README.md new file mode 100644 index 0000000..89430b8 --- /dev/null +++ b/huggingface-model/README.md @@ -0,0 +1,60 @@ +# Helm chart for deploy Hugging Face to kubernetes cluster + +See [Hugging Face models](https://huggingface.co/models) + +## Parameters + +### Model + +| Name | Description | Value | +| --------------------------- | ---------------------------------------------------- | ----------------------------------------------------- | +| `model.organization` | Models' company name on huggingface, required! | `""` | +| `model.name` | Models' name on huggingface, required! | `""` | +| `init.s3.enabled` | Turn on/off s3 data source Default: disabled | `false` | +| `init.s3.bucketURL` | Full s3 URL included path to model's folder | `s3://k8s-model-zephyr/llm/deployment/segmind/SSD-1B` | +| `huggingface.containerPort` | Deployment/StatefulSet ContainerPort, optional | `8080` | +| `huggingface.args` | Additional arg for text-generation-launcher optional | `[]` | + +### Global + +| Name | Description | Value | +| --------------------------------- | ------------------------------------------------------------------------------------------------ | ----------------------------------------------- | +| `replicaCount` | Deployment/StatefulSet replicaCount | `1` | +| `kind` | Resource king [allowed values: deployment/StatefulSet, optional] | `deployment` | +| `image.repo` | Huggingface image repo | `ghcr.io/huggingface/text-generation-inference` | +| `image.tag` | Huggingface image version | `latest` | +| `image.pullPolicy` | Huggingface image pull policy | `IfNotPresent` | +| `imagePullSecrets` | May need if used private repo as a cache for image ghcr.io/huggingface/text-generation-inference | `[]` | +| `nameOverride` | String to partially override common.names.name | `""` | +| `fullnameOverride` | String to fully override common.names.fullname | `""` | +| `persistence.accessModes` | PVC accessModes | `["ReadWriteOnce"]` | +| `persistence.storageClassName` | Kubernetes storageClass name | `gp2` | +| `persistence.storage` | Volume size | `100Gi` | +| `service.port` | Service port, default 8080 | `8080` | +| `service.type` | Service type, default ClusterIP | `ClusterIP` | +| `serviceAccount.create` | Enable/disable service account, default enabled | `true` | +| `serviceAccount.role` | Kubernetes role configuration, default nil | `{}` | +| `podAnnotations` | Annotations for Redis® replicas pods | `{}` | +| `securityContext` | Set pod's Security Context fsGroup | `{}` | +| `extraEnvVars` | Array with extra environment variables to add to main pod | `[]` | +| `ingresses.enabled` | Enable/disable ingress(es) for model API, default disabled | `false` | +| `ingresses.configs` | List of ingresses configs | `[]` | +| `livenessProbe` | Configure extra options for model liveness probe | `{}` | +| `readinessProbe` | Configure extra options for model readiness probe | `{}` | +| `startupProbe` | Configure extra options for model startup probe | `{}` | +| `pdb.create` | Specifies whether a PodDisruptionBudget should be created | `false` | +| `pdb.minAvailable` | Min number of pods that must still be available after the eviction | `1` | +| `pdb.maxUnavailable` | Max number of pods that can be unavailable after the eviction | `""` | +| `resources.limits.nvidia.com/gpu` | The required option by text-generation-launcher | `1` | +| `resources.requests.cpu` | The requested CPU minimal recommended value | `3` | +| `resources.requests.memory` | The requested memory minimal recommended size | `10Gi` | +| `extraVolumes` | Optionally specify extra list of additional volumes for models' pods | `[]` | +| `extraVolumeMounts` | Optionally specify extra list of additional volumeMounts for models' container | `[]` | +| `autoscaling.enabled` | Enable Horizontal POD autoscaling for model | `true` | +| `autoscaling.minReplicas` | Minimum number of model replicas | `1` | +| `autoscaling.maxReplicas` | Maximum number of model replicas | `5` | +| `autoscaling.targetCPU` | Target CPU utilization percentage | `50` | +| `autoscaling.targetMemory` | Target Memory utilization percentage | `50` | +| `affinity` | Affinity for pod assignment | `{}` | +| `nodeSelector` | Node labels for pod assignment | `{}` | +| `tolerations` | Tolerations for pod assignment | `[]` | diff --git a/huggingface-model/templates/_helpers.tpl b/huggingface-model/templates/_helpers.tpl new file mode 100644 index 0000000..a6a2a88 --- /dev/null +++ b/huggingface-model/templates/_helpers.tpl @@ -0,0 +1,83 @@ +{{/* +Generate internal container port. +*/}} +{{- define "huggingface-model.containerPort" -}} +{{- if .Values.huggingface }} +{{- default 8080 .Values.huggingface.containerPort }} +{{- else }} +8080 +{{- end }} +{{- end}} +{{- define "common.capabilities.kubeVersion" -}} +{{- if .Values.global }} + {{- if .Values.global.kubeVersion }} + {{- .Values.global.kubeVersion -}} + {{- else }} + {{- default .Capabilities.KubeVersion.Version .Values.kubeVersion -}} + {{- end -}} +{{- else }} +{{- default .Capabilities.KubeVersion.Version .Values.kubeVersion -}} +{{- end -}} +{{- end -}} +{{/* +Expand the name of the chart. +*/}} +{{- define "huggingface-model.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "huggingface-model.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "huggingface-model.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "huggingface-model.labels" -}} +helm.sh/chart: {{ include "huggingface-model.chart" . }} +{{ include "huggingface-model.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "huggingface-model.selectorLabels" -}} +app.kubernetes.io/name: {{ include "huggingface-model.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "huggingface-model.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "huggingface-model.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} diff --git a/huggingface-model/templates/application.yaml b/huggingface-model/templates/application.yaml new file mode 100644 index 0000000..8847766 --- /dev/null +++ b/huggingface-model/templates/application.yaml @@ -0,0 +1,125 @@ +{{- if or ( not .Values.model ) ( or (not .Values.model.name) (not .Values.model.organization) ) -}} +{{- fail "model.name and model.id are required but not set!" -}} +{{- end }} +{{- $kind := .Values.kind | default "deployment" }} +{{- $namePrefix := include "huggingface-model.fullname" . | trunc 63 | trimSuffix "-" -}} +apiVersion: apps/v1 +kind: {{ $kind }} +metadata: + annotations: + reloader.stakater.com/auto: "true" + name: {{ include "huggingface-model.fullname" . }} + labels: + {{- include "huggingface-model.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.replicaCount }} + selector: + matchLabels: + {{- include "huggingface-model.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "huggingface-model.selectorLabels" . | nindent 8 }} + {{- if .Values.pdb.create }} + pdbEnabled: {{ include "huggingface-model.fullname" . }} + {{- end }} + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if eq $kind "StatefulSet" }} + serviceName: {{ include "huggingface-model.fullname" . }} + {{- end }} + {{- if .Values.serviceAccount.create }} + serviceAccountName: {{ include "huggingface-model.serviceAccountName" . }} + {{- end }} + {{- with .Values.podSecurityContext }} + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + {{- end }} + initContainers: + - name: init + image: alpine:3.18.4 + imagePullPolicy: "IfNotPresent" + command: [ "/bin/sh", "-c" ] + args: + - if [ -d "/data/{{ .Values.model.name }}" ]; then echo "Model {{ .Values.model.id }} is already downloaded. Skipping init..."; exit 0; fi + {{- if and .Values.init.s3.enabled }} + - apk add --update aws-cli + - aws s3 cp --recursive {{ .Values.init.s3.bucketURL }} /data/{{ .Values.model.name }} + {{- else }} + - apk add --update git-lfs + - git clone --depth=1 https://huggingface.co/{{ .Values.model.id }} + {{- end }} + volumeMounts: + - name: model-storage + mountPath: /data + resources: + requests: + cpu: "3" + containers: + - name: model + image: {{ .Values.image.repo }}:{{ .Values.image.tag }} + imagePullPolicy: {{ .Values.image.pullPolicy | default "IfNotPresent" }} + command: [ "text-generation-launcher" ] + args: + - "--model-id" + - "{{ .Values.model.id }}" + - "--huggingface-hub-cache" + - "/usr/src/{{ .Values.model.name }}" + - "--weights-cache-override" + - "/usr/src/{{ .Values.model.name }}" + {{- with .Values.huggingface.args }} + {{ toYaml . | indent 12 }} + {{- end }} + ports: + - containerPort: {{ include "huggingface-model.containerPort" . }} + env: + - name: PORT + value: {{ include "huggingface-model.containerPort" . }} + - name: HUGGINGFACE_OFFLINE + value: "1" + {{- with .Values.extraEnvVars }} + {{ toYaml . | nindent 12 }} + {{- end}} + {{- with .Values.livenessProbe }} + livenessProbe: {{ toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.readinessProbe }} + readinessProbe: {{ toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.startupProbe }} + startupProbe: {{ toYaml . | nindent 12 }} + {{- end }} + volumeMounts: + - name: model-storage + mountPath: /data + {{- with .Values.extraVolumeMounts}} + {{- toYaml . | nindent 12 }} + {{- end }} + resources: + {{- toYaml .Values.resources | nindent 12 }} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + volumes: + - name: model-storage + persistentVolumeClaim: + claimName: {{ include "huggingface-model.fullname" . }} + {{- with .Values.extraVolumes }} + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/huggingface-model/templates/auth-secret.yaml b/huggingface-model/templates/auth-secret.yaml new file mode 100644 index 0000000..bb050c1 --- /dev/null +++ b/huggingface-model/templates/auth-secret.yaml @@ -0,0 +1,9 @@ +{{- if .Values.authSecret }} +apiVersion: v1 +data: + auth: {{ .Values.authSecret.basicAuth.authHash }} +kind: Secret +metadata: + name: {{ .Values.authSecret.basicAuth.secretName }} +{{- end }} + diff --git a/huggingface-model/templates/hpa.yaml b/huggingface-model/templates/hpa.yaml new file mode 100644 index 0000000..aed9b19 --- /dev/null +++ b/huggingface-model/templates/hpa.yaml @@ -0,0 +1,42 @@ +{{- if .Values.autoscaling.enabled }} +{{- $kind := .Values.kind | default "deployment" }} +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: {{ include "huggingface-model.fullname" . }} + labels: + {{- include "huggingface-model.labels" . | nindent 4 }} +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: {{ $kind }} + name: {{ include "huggingface-model.fullname" . }} + minReplicas: {{ .Values.autoscaling.minReplicas }} + maxReplicas: {{ .Values.autoscaling.maxReplicas }} + metrics: + {{- if .Values.autoscaling.targetMemory }} + - type: Resource + resource: + name: memory + {{- if semverCompare "<1.23-0" (include "common.capabilities.kubeVersion" .) }} + targetAverageUtilization: {{ .Values.autoscaling.targetMemory }} + {{- else }} + target: + type: Utilization + averageUtilization: {{ .Values.autoscaling.targetMemory }} + {{- end }} + {{- end }} + {{- if .Values.autoscaling.targetCPU }} + - type: Resource + resource: + name: cpu + {{- if semverCompare "<1.23-0" (include "common.capabilities.kubeVersion" .) }} + targetAverageUtilization: {{ .Values.autoscaling.targetCPU }} + {{- else }} + target: + type: Utilization + averageUtilization: {{ .Values.autoscaling.targetCPU }} + {{- end }} + {{- end }} + +{{- end }} diff --git a/huggingface-model/templates/ingress.yaml b/huggingface-model/templates/ingress.yaml new file mode 100644 index 0000000..691883f --- /dev/null +++ b/huggingface-model/templates/ingress.yaml @@ -0,0 +1,61 @@ +{{- if .Values.ingresses.enabled -}} +{{- $gitVersion := .Capabilities.KubeVersion.GitVersion }} +{{- $labels := include "huggingface-model.labels" . -}} +{{- $fullName := include "huggingface-model.fullname" . -}} +{{- range $index, $_ := .Values.ingresses.configs }} +--- +{{- if semverCompare ">=1.19-0" $gitVersion -}} +apiVersion: networking.k8s.io/v1 +{{- else if semverCompare ">=1.14-0" $gitVersion -}} +apiVersion: networking.k8s.io/v1beta1 +{{- else -}} +apiVersion: extensions/v1beta1 +{{- end }} +kind: Ingress +metadata: + name: {{ $fullName }}-{{ $index }} + labels: + {{- $labels | nindent 4 }} + {{- with .annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + {{- if .tls }} + tls: + {{- range .tls }} + - hosts: + {{- range .hosts }} + - {{ . | quote }} + {{- end }} + secretName: {{ .secretName }} + {{- end }} + {{- end }} + rules: + {{- range .hosts }} + - host: {{ .host | quote }} + http: + paths: + {{- range .paths }} + - path: {{ .path }} + pathType: {{ .pathType }} + backend: + service: + {{- if .serviceName }} + name: {{ .serviceName }} + {{- else }} + name: {{ $fullName }} + {{- end }} + port: + {{- if .servicePort }} + {{- .servicePort | toYaml | nindent 18 }} + {{- else }} + number: 80 + {{- end }} + {{- if .extendedOptions }} + {{- .extendedOptions | toYaml | nindent 14 }} + {{- end }} + {{- end }} + {{- end }} +{{- end }} +{{- end }} diff --git a/huggingface-model/templates/pdb.yaml b/huggingface-model/templates/pdb.yaml new file mode 100644 index 0000000..bfe31fc --- /dev/null +++ b/huggingface-model/templates/pdb.yaml @@ -0,0 +1,19 @@ +{{- if .Values.pdb.create }} +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + name: {{ include "huggingface-model.fullname" . }} + namespace: {{ .Release.Namespace | quote }} + labels: + {{- include "huggingface-model.labels" . | nindent 4 }} +spec: + {{- if .Values.pdb.minAvailable }} + minAvailable: {{ .Values.pdb.minAvailable }} + {{- end }} + {{- if .Values.pdb.maxUnavailable }} + maxUnavailable: {{ .Values.pdb.maxUnavailable }} + {{- end }} + selector: + matchLabels: + pdbEnabled: {{ include "huggingface-model.fullname" . }} +{{- end }} diff --git a/huggingface-model/templates/pvc.yaml b/huggingface-model/templates/pvc.yaml new file mode 100644 index 0000000..838a9be --- /dev/null +++ b/huggingface-model/templates/pvc.yaml @@ -0,0 +1,10 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: {{ include "huggingface-model.fullname" . }} +spec: + accessModes: {{ .Values.persistence.accessModes | toYaml | nindent 4 }} + storageClassName: {{ .Values.persistence.storageClassName }} + resources: + requests: + storage: {{ .Values.persistence.storage }} diff --git a/huggingface-model/templates/rbac.yaml b/huggingface-model/templates/rbac.yaml new file mode 100644 index 0000000..4993d47 --- /dev/null +++ b/huggingface-model/templates/rbac.yaml @@ -0,0 +1,23 @@ +{{- if .Values.serviceAccount.create -}} +{{- if .Values.serviceAccount.role -}} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ default (include "huggingface-model.fullname" .) .Values.serviceAccount.role.name }} +rules: {{ .Values.serviceAccount.role.rules | toYaml | nindent 2 }} +--- + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ default (include "huggingface-model.fullname" .) .Values.serviceAccount.role.name }}-binding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ default (include "huggingface-model.fullname" .) .Values.serviceAccount.role.name }} +subjects: + - kind: ServiceAccount + name: {{ include "huggingface-model.serviceAccountName" . }} + namespace: {{ .Release.Namespace | quote }} +{{- end }} +{{- end }} diff --git a/huggingface-model/templates/service.yaml b/huggingface-model/templates/service.yaml new file mode 100644 index 0000000..85e0e23 --- /dev/null +++ b/huggingface-model/templates/service.yaml @@ -0,0 +1,14 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "huggingface-model.fullname" . }} + labels: + {{- include "huggingface-model.labels" . | nindent 4 }} +spec: + type: {{ .Values.service.type | default "ClusterIP" }} + ports: + - name: http + port: {{ .Values.service.port | default 8080 }} + targetPort: {{ include "huggingface-model.containerPort" . }} + selector: + {{- include "huggingface-model.selectorLabels" . | nindent 4 }} diff --git a/huggingface-model/templates/serviceaccount.yaml b/huggingface-model/templates/serviceaccount.yaml new file mode 100644 index 0000000..5ce1220 --- /dev/null +++ b/huggingface-model/templates/serviceaccount.yaml @@ -0,0 +1,12 @@ +{{- if .Values.serviceAccount.create -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "huggingface-model.serviceAccountName" . }} + labels: + {{- include "huggingface-model.labels" . | nindent 4 }} + {{- with .Values.serviceAccount.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +{{- end }} diff --git a/huggingface-model/values.yaml b/huggingface-model/values.yaml new file mode 100644 index 0000000..e8fa0e8 --- /dev/null +++ b/huggingface-model/values.yaml @@ -0,0 +1,267 @@ +## Huggingface model +## @section Model +## ref: https://huggingface.co/models +## @param model.organization Models' company name on huggingface, required! +## @param model.name Models' name on huggingface, required! +## e.g. to deploy model https://huggingface.co/segmind/SSD-1B use configuration below: +## organization: segmind +## name: SSD-1B +## +model: + organization: "" + name: "" + +## Init configuration. By default, init clone model from huggingface git. +## The another way is to upload model to s3 bucket to reduce init delay and external traffic. +## @param init.s3.enabled Turn on/off s3 data source Default: disabled +## @param init.s3.bucketURL Full s3 URL included path to model's folder +## +init: + s3: + enabled: false + bucketURL: s3://k8s-model-zephyr/llm/deployment/segmind/SSD-1B + +## huggingface block configure running text-generation-launcher internal port and additional arguments +## @param huggingface.containerPort Deployment/StatefulSet ContainerPort, optional +## +huggingface: + containerPort: 8080 + ## @param huggingface.args Additional arg for text-generation-launcher optional + ## e.g. + ## args: + ## - "--quantize" + ## - "bitsandbytes" + ## - "--num-shard" + ## - "1" + ## + args: [] + +## @section Global +## @param replicaCount Deployment/StatefulSet replicaCount +## +replicaCount: 1 + +## @param kind Resource king [allowed values: deployment/StatefulSet, optional] +## +kind: deployment + +## Huggingface image +## @param image.repo Huggingface image repo +## @param image.tag Huggingface image version +## @param image.pullPolicy Huggingface image pull policy +## ref: https://kubernetes.io/docs/concepts/containers/images/#image-pull-policy +## +image: + repo: ghcr.io/huggingface/text-generation-inference + tag: "latest" + pullPolicy: IfNotPresent + +## @param imagePullSecrets May need if used private repo as a cache for image ghcr.io/huggingface/text-generation-inference +# +imagePullSecrets: [] + +## @param nameOverride String to partially override common.names.name +## +nameOverride: "" + +## @param fullnameOverride String to fully override common.names.fullname +## +fullnameOverride: "" + +## Persistence parameters +## ref: https://kubernetes.io/docs/concepts/storage/persistent-volumes/ +## @param persistence.accessModes PVC accessModes +## @param persistence.storageClassName Kubernetes storageClass name +## @param persistence.storage Volume size +## +persistence: + accessModes: + - ReadWriteOnce + storageClassName: gp2 + storage: 100Gi + +## Persistence parameters +## ref: https://kubernetes.io/docs/concepts/services-networking/ +## @param service.port Service port, default 8080 +## @param service.type Service type, default ClusterIP +## +service: + port: 8080 + type: "ClusterIP" + +## ServiceAccount parameters +## ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/ +## @param serviceAccount.create Enable/disable service account, default enabled +## @param serviceAccount.role Kubernetes role configuration, default nil +## +serviceAccount: + create: true + role: {} +# rules: +# - apiGroups: +# - "" +# resources: +# - endpoints +# - pods +# - nodes +# - services +# verbs: +# - get +# - list + +## @param podAnnotations Annotations for Redis® replicas pods +## ref: https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/ +## +podAnnotations: {} + +## Configure Pods Security Context +## ref: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-pod +## @param securityContext Set pod's Security Context fsGroup +## +securityContext: {} +# capabilities: +# drop: +# - ALL +# readOnlyRootFilesystem: true +# runAsNonRoot: true +# runAsUser: 1000 + +## @param extraEnvVars Array with extra environment variables to add to main pod +## e.g: +## extraEnvVars: +## - name: FOO +## value: "bar" +## +extraEnvVars: [] + +## Configure the ingresses resources list that allows you to access the model API +## @param ingresses.enabled Enable/disable ingress(es) for model API, default disabled +## +ingresses: + enabled: false + ## ingresses list + ## ref: https://kubernetes.io/docs/concepts/services-networking/ingress/ + ## @param ingresses.configs List of ingresses configs + ## e.g. + ## configs: + ## - annotations: + ## cert-manager.io/cluster-issuer: "letsencrypt-http" + ## hosts: + ## - host: api.model.example.com + ## paths: + ## - path: / + ## pathType: Prefix + ## tls: + ## - hosts: + ## - api.model.example.com + ## secretName: huggingface-model + ## + configs: [] + + +## @param livenessProbe Configure extra options for model liveness probe +## ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#configure-probes +## +livenessProbe: {} +# failureThreshold: 4 +# httpGet: +# path: / +# initialDelaySeconds: 1 +# periodSeconds: 5 +# successThreshold: 1 +# timeoutSeconds: 3 + +## @param readinessProbe Configure extra options for model readiness probe +## ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#configure-probes +## +readinessProbe: {} +# failureThreshold: 3 +# httpGet: +# path: / +# initialDelaySeconds: 1 +# periodSeconds: 3 +# successThreshold: 2 +# timeoutSeconds: 2 + +## @param startupProbe Configure extra options for model startup probe +## ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#configure-probes +## +startupProbe: {} +# failureThreshold: 10 +# httpGet: +# path: / +# initialDelaySeconds: 15 +# periodSeconds: 5 +# successThreshold: 1 +# timeoutSeconds: 3 + +## PodDisruptionBudget configuration +pdb: + ## @param pdb.create Specifies whether a PodDisruptionBudget should be created + ## + create: false + ## @param pdb.minAvailable Min number of pods that must still be available after the eviction + ## + minAvailable: 1 + ## @param pdb.maxUnavailable Max number of pods that can be unavailable after the eviction + ## + maxUnavailable: "" + +## Init container's resource requests and limits +## ref: https://kubernetes.io/docs/user-guide/compute-resources/ +## @param resources.limits.nvidia.com/gpu The required option by text-generation-launcher +## @param resources.requests.cpu The requested CPU minimal recommended value +## @param resources.requests.memory The requested memory minimal recommended size +## +resources: + requests: + cpu: "3" + memory: "10Gi" + limits: + nvidia.com/gpu: 1 + +## @param extraVolumes Optionally specify extra list of additional volumes for models' pods +## e.g. +## - hostPath: +## path: /opt/model/logs +## type: DirectoryOrCreate +## name: logging +## +extraVolumes: [] + +## @param extraVolumeMounts Optionally specify extra list of additional volumeMounts for models' container +## e.g. +## - mountPath: /opt/model/logs +## name: logging +## +extraVolumeMounts: [] + +## Model Autoscaling configuration +## ref: https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/ +## @param autoscaling.enabled Enable Horizontal POD autoscaling for model +## @param autoscaling.minReplicas Minimum number of model replicas +## @param autoscaling.maxReplicas Maximum number of model replicas +## @param autoscaling.targetCPU Target CPU utilization percentage +## @param autoscaling.targetMemory Target Memory utilization percentage +## +autoscaling: + enabled: true + minReplicas: 1 + maxReplicas: 5 + targetCPU: 50 + targetMemory: 50 + +## @param affinity Affinity for pod assignment +## Ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity +## NOTE: podAffinityPreset, podAntiAffinityPreset, and nodeAffinityPreset will be ignored when it's set +## +affinity: {} +## @param nodeSelector Node labels for pod assignment +## ref: https://kubernetes.io/docs/user-guide/node-selection/ +## +nodeSelector: {} +## @param tolerations Tolerations for pod assignment +## ref: https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/ +## +tolerations: [] +