From f26ee13f0b2c429524a483d233917ff37cb58a17 Mon Sep 17 00:00:00 2001 From: tylertitsworth Date: Thu, 26 Sep 2024 15:24:44 -0700 Subject: [PATCH] add gaudi2 support Signed-off-by: tylertitsworth --- workflows/charts/tgi/Chart.yaml | 2 +- workflows/charts/tgi/README.md | 9 ++-- workflows/charts/tgi/templates/NOTES.txt | 1 - workflows/charts/tgi/templates/deploy.yaml | 48 +++++++++++++--------- workflows/charts/tgi/templates/pvc.yaml | 29 +++++++++++++ workflows/charts/tgi/templates/secret.yaml | 1 + workflows/charts/tgi/values.yaml | 13 +++--- 7 files changed, 72 insertions(+), 31 deletions(-) create mode 100644 workflows/charts/tgi/templates/pvc.yaml diff --git a/workflows/charts/tgi/Chart.yaml b/workflows/charts/tgi/Chart.yaml index 761d8b0c..7d0cd59a 100644 --- a/workflows/charts/tgi/Chart.yaml +++ b/workflows/charts/tgi/Chart.yaml @@ -33,7 +33,7 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.1.0 +version: 0.2.0 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to diff --git a/workflows/charts/tgi/README.md b/workflows/charts/tgi/README.md index 7c020fe1..93df5830 100644 --- a/workflows/charts/tgi/README.md +++ b/workflows/charts/tgi/README.md @@ -7,23 +7,24 @@ For more information about how to use Huggingface text-generation-inference with > [!TIP] > For Gaudi-related documentation, check out [tgi-gaudi](https://github.com/huggingface/tgi-gaudi). -![Version: 0.1.0](https://img.shields.io/badge/Version-0.1.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 1.16.0](https://img.shields.io/badge/AppVersion-1.16.0-informational?style=flat-square) +![Version: 0.2.0](https://img.shields.io/badge/Version-0.2.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 1.16.0](https://img.shields.io/badge/AppVersion-1.16.0-informational?style=flat-square) ## Values | Key | Type | Default | Description | |-----|------|---------|-------------| -| deploy.configMapName | string | `"intel-proxy-config"` | ConfigMap of Environment Variables | +| deploy.configMap | object | `{"enabled":true,"name":"tgi-config"}` | ConfigMap of Environment Variables | | deploy.image | string | `"ghcr.io/huggingface/text-generation-inference:latest-intel"` | Intel TGI Image | -| deploy.model | string | `"HuggingFaceTB/SmolLM-135M"` | Model to be loaded | -| deploy.quantize | string | `""` | Enable Quantization (ex: bitsandbytes-nf4) | | deploy.replicaCount | int | `1` | Number of pods | | deploy.resources | object | `{"limits":{"cpu":"4000m","gpu.intel.com/i915":1},"requests":{"cpu":"1000m","memory":"1Gi"}}` | Resource configuration | | deploy.resources.limits."gpu.intel.com/i915" | int | `1` | Intel GPU Device Configuration | | fullnameOverride | string | `""` | Full qualified Domain Name | | ingress | object | `{"annotations":{},"className":"","enabled":false,"hosts":[{"host":"chart-example.local","paths":[{"path":"/","pathType":"ImplementationSpecific"}]}],"tls":[]}` | Ingress configuration | | nameOverride | string | `""` | Name of the serving service | +| pvc.size | string | `"15Gi"` | | +| pvc.storageClassName | string | `"nil"` | | | secret.encodedToken | string | `""` | Base64 Encoded Huggingface Hub API Token | +| securityContext | object | `{}` | Security Context Configuration | | service | object | `{"port":80,"type":"NodePort"}` | Service configuration | ---------------------------------------------- diff --git a/workflows/charts/tgi/templates/NOTES.txt b/workflows/charts/tgi/templates/NOTES.txt index fc906eb6..edf83d37 100644 --- a/workflows/charts/tgi/templates/NOTES.txt +++ b/workflows/charts/tgi/templates/NOTES.txt @@ -17,6 +17,5 @@ {{- else if contains "ClusterIP" .Values.service.type }} export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "tgi.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}") export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}") - echo "Visit http://127.0.0.1:8080 to use your application" kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT {{- end }} diff --git a/workflows/charts/tgi/templates/deploy.yaml b/workflows/charts/tgi/templates/deploy.yaml index 6c5a5bd5..0036de34 100644 --- a/workflows/charts/tgi/templates/deploy.yaml +++ b/workflows/charts/tgi/templates/deploy.yaml @@ -28,54 +28,62 @@ spec: labels: {{- include "tgi.selectorLabels" . | nindent 8 }} spec: - securityContext: - fsGroup: 1000 - runAsUser: 1000 + hostIPC: true containers: - name: {{ .Chart.Name }} args: - - '--model-id' - - {{ .Values.deploy.model | quote }} - {{- if index .Values.deploy.resources.limits "gpu.intel.com/i915" }} - - '--num-shard' - - {{ index .Values.deploy.resources.limits "gpu.intel.com/i915" | quote }} - {{- end }} - '-p' - {{ .Values.service.port | quote }} - {{- if .Values.quantize }} - - '--quantize' - - {{ .Values.deploy.quantize | quote }} - {{- end }} - '--cuda-graphs=0' envFrom: + {{- if eq .Values.deploy.configMap.enabled true }} - configMapRef: - name: {{ .Values.deploy.configMapName }} + name: {{ .Values.deploy.configMap.name }} + {{- end }} - secretRef: name: {{ .Release.Name }}-hf-token - env: - - name: NUMBA_CACHE_DIR # https://github.com/huggingface/text-generation-inference/pull/2443 - value: /data/numba_cache + # env: + # - name: NUMBA_CACHE_DIR # https://github.com/huggingface/text-generation-inference/pull/2443 + # value: /data/numba_cache image: {{ .Values.deploy.image }} livenessProbe: - httpGet: - path: /health - port: {{ .Values.service.port }} + failureThreshold: 10 initialDelaySeconds: 5 periodSeconds: 5 + tcpSocket: + port: http + readinessProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + tcpSocket: + port: http + startupProbe: + failureThreshold: 120 + initialDelaySeconds: 20 + periodSeconds: 5 + tcpSocket: + port: http ports: - name: http containerPort: {{ .Values.service.port }} protocol: TCP resources: {{- toYaml .Values.deploy.resources | nindent 12 }} + securityContext: + {{ toYaml .Values.securityContext | nindent 12 }} volumeMounts: - mountPath: /dev/shm name: dshm - mountPath: /data name: hf-data + - mountPath: /tmp + name: tmp volumes: - name: dshm emptyDir: medium: Memory - name: hf-data + persistentVolumeClaim: + claimName: {{ include "tgi.fullname" . }}-cache + - name: tmp emptyDir: {} diff --git a/workflows/charts/tgi/templates/pvc.yaml b/workflows/charts/tgi/templates/pvc.yaml new file mode 100644 index 00000000..dd5c4041 --- /dev/null +++ b/workflows/charts/tgi/templates/pvc.yaml @@ -0,0 +1,29 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: {{ include "tgi.fullname" . }}-cache + labels: + {{- include "tgi.labels" . | nindent 4 }} +spec: + {{- if .Values.pvc.storageClassName }} + storageClassName: {{ .Values.pvc.storageClassName }} + {{- end }} + accessModes: + - ReadWriteMany + resources: + requests: + storage: {{ .Values.pvc.size }} diff --git a/workflows/charts/tgi/templates/secret.yaml b/workflows/charts/tgi/templates/secret.yaml index 0507543e..6503253e 100644 --- a/workflows/charts/tgi/templates/secret.yaml +++ b/workflows/charts/tgi/templates/secret.yaml @@ -13,6 +13,7 @@ # limitations under the License. {{- $name := .Values.secret.encodedToken | required ".Values.secret.encodedToken is required in Base64 Format." -}} +--- apiVersion: v1 kind: Secret metadata: diff --git a/workflows/charts/tgi/values.yaml b/workflows/charts/tgi/values.yaml index 7d2434cc..ef4f4828 100644 --- a/workflows/charts/tgi/values.yaml +++ b/workflows/charts/tgi/values.yaml @@ -18,13 +18,11 @@ nameOverride: "" fullnameOverride: "" deploy: # -- ConfigMap of Environment Variables - configMapName: intel-proxy-config + configMap: + enabled: true + name: tgi-config # -- Intel TGI Image image: ghcr.io/huggingface/text-generation-inference:latest-intel - # -- Model to be loaded - model: HuggingFaceTB/SmolLM-135M - # -- Enable Quantization (ex: bitsandbytes-nf4) - quantize: "" # -- Number of pods replicaCount: 1 # -- Resource configuration @@ -39,6 +37,8 @@ deploy: requests: cpu: 1000m memory: "1Gi" +# -- Security Context Configuration +securityContext: {} secret: # -- Base64 Encoded Huggingface Hub API Token encodedToken: "" @@ -62,3 +62,6 @@ ingress: # - secretName: chart-example-tls # hosts: # - chart-example.local +pvc: + storageClassName: nil + size: 15Gi