Skip to content

Commit f26ee13

Browse files
author
tylertitsworth
committed
add gaudi2 support
Signed-off-by: tylertitsworth <tyler.titsworth@intel.com>
1 parent e186489 commit f26ee13

File tree

7 files changed

+72
-31
lines changed

7 files changed

+72
-31
lines changed

workflows/charts/tgi/Chart.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ type: application
3333
# This is the chart version. This version number should be incremented each time you make changes
3434
# to the chart and its templates, including the app version.
3535
# Versions are expected to follow Semantic Versioning (https://semver.org/)
36-
version: 0.1.0
36+
version: 0.2.0
3737

3838
# This is the version number of the application being deployed. This version number should be
3939
# incremented each time you make changes to the application. Versions are not expected to

workflows/charts/tgi/README.md

+5-4
Original file line numberDiff line numberDiff line change
@@ -7,23 +7,24 @@ For more information about how to use Huggingface text-generation-inference with
77
> [!TIP]
88
> For Gaudi-related documentation, check out [tgi-gaudi](https://github.com/huggingface/tgi-gaudi).
99
10-
![Version: 0.1.0](https://img.shields.io/badge/Version-0.1.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 1.16.0](https://img.shields.io/badge/AppVersion-1.16.0-informational?style=flat-square)
10+
![Version: 0.2.0](https://img.shields.io/badge/Version-0.2.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 1.16.0](https://img.shields.io/badge/AppVersion-1.16.0-informational?style=flat-square)
1111

1212
## Values
1313

1414
| Key | Type | Default | Description |
1515
|-----|------|---------|-------------|
16-
| deploy.configMapName | string | `"intel-proxy-config"` | ConfigMap of Environment Variables |
16+
| deploy.configMap | object | `{"enabled":true,"name":"tgi-config"}` | ConfigMap of Environment Variables |
1717
| deploy.image | string | `"ghcr.io/huggingface/text-generation-inference:latest-intel"` | Intel TGI Image |
18-
| deploy.model | string | `"HuggingFaceTB/SmolLM-135M"` | Model to be loaded |
19-
| deploy.quantize | string | `""` | Enable Quantization (ex: bitsandbytes-nf4) |
2018
| deploy.replicaCount | int | `1` | Number of pods |
2119
| deploy.resources | object | `{"limits":{"cpu":"4000m","gpu.intel.com/i915":1},"requests":{"cpu":"1000m","memory":"1Gi"}}` | Resource configuration |
2220
| deploy.resources.limits."gpu.intel.com/i915" | int | `1` | Intel GPU Device Configuration |
2321
| fullnameOverride | string | `""` | Full qualified Domain Name |
2422
| ingress | object | `{"annotations":{},"className":"","enabled":false,"hosts":[{"host":"chart-example.local","paths":[{"path":"/","pathType":"ImplementationSpecific"}]}],"tls":[]}` | Ingress configuration |
2523
| nameOverride | string | `""` | Name of the serving service |
24+
| pvc.size | string | `"15Gi"` | |
25+
| pvc.storageClassName | string | `"nil"` | |
2626
| secret.encodedToken | string | `""` | Base64 Encoded Huggingface Hub API Token |
27+
| securityContext | object | `{}` | Security Context Configuration |
2728
| service | object | `{"port":80,"type":"NodePort"}` | Service configuration |
2829

2930
----------------------------------------------

workflows/charts/tgi/templates/NOTES.txt

-1
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,5 @@
1717
{{- else if contains "ClusterIP" .Values.service.type }}
1818
export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "tgi.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
1919
export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
20-
echo "Visit http://127.0.0.1:8080 to use your application"
2120
kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
2221
{{- end }}

workflows/charts/tgi/templates/deploy.yaml

+28-20
Original file line numberDiff line numberDiff line change
@@ -28,54 +28,62 @@ spec:
2828
labels:
2929
{{- include "tgi.selectorLabels" . | nindent 8 }}
3030
spec:
31-
securityContext:
32-
fsGroup: 1000
33-
runAsUser: 1000
31+
hostIPC: true
3432
containers:
3533
- name: {{ .Chart.Name }}
3634
args:
37-
- '--model-id'
38-
- {{ .Values.deploy.model | quote }}
39-
{{- if index .Values.deploy.resources.limits "gpu.intel.com/i915" }}
40-
- '--num-shard'
41-
- {{ index .Values.deploy.resources.limits "gpu.intel.com/i915" | quote }}
42-
{{- end }}
4335
- '-p'
4436
- {{ .Values.service.port | quote }}
45-
{{- if .Values.quantize }}
46-
- '--quantize'
47-
- {{ .Values.deploy.quantize | quote }}
48-
{{- end }}
4937
- '--cuda-graphs=0'
5038
envFrom:
39+
{{- if eq .Values.deploy.configMap.enabled true }}
5140
- configMapRef:
52-
name: {{ .Values.deploy.configMapName }}
41+
name: {{ .Values.deploy.configMap.name }}
42+
{{- end }}
5343
- secretRef:
5444
name: {{ .Release.Name }}-hf-token
55-
env:
56-
- name: NUMBA_CACHE_DIR # https://github.com/huggingface/text-generation-inference/pull/2443
57-
value: /data/numba_cache
45+
# env:
46+
# - name: NUMBA_CACHE_DIR # https://github.com/huggingface/text-generation-inference/pull/2443
47+
# value: /data/numba_cache
5848
image: {{ .Values.deploy.image }}
5949
livenessProbe:
60-
httpGet:
61-
path: /health
62-
port: {{ .Values.service.port }}
50+
failureThreshold: 10
6351
initialDelaySeconds: 5
6452
periodSeconds: 5
53+
tcpSocket:
54+
port: http
55+
readinessProbe:
56+
initialDelaySeconds: 5
57+
periodSeconds: 5
58+
tcpSocket:
59+
port: http
60+
startupProbe:
61+
failureThreshold: 120
62+
initialDelaySeconds: 20
63+
periodSeconds: 5
64+
tcpSocket:
65+
port: http
6566
ports:
6667
- name: http
6768
containerPort: {{ .Values.service.port }}
6869
protocol: TCP
6970
resources:
7071
{{- toYaml .Values.deploy.resources | nindent 12 }}
72+
securityContext:
73+
{{ toYaml .Values.securityContext | nindent 12 }}
7174
volumeMounts:
7275
- mountPath: /dev/shm
7376
name: dshm
7477
- mountPath: /data
7578
name: hf-data
79+
- mountPath: /tmp
80+
name: tmp
7681
volumes:
7782
- name: dshm
7883
emptyDir:
7984
medium: Memory
8085
- name: hf-data
86+
persistentVolumeClaim:
87+
claimName: {{ include "tgi.fullname" . }}-cache
88+
- name: tmp
8189
emptyDir: {}
+29
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# Copyright (c) 2024 Intel Corporation
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
apiVersion: v1
16+
kind: PersistentVolumeClaim
17+
metadata:
18+
name: {{ include "tgi.fullname" . }}-cache
19+
labels:
20+
{{- include "tgi.labels" . | nindent 4 }}
21+
spec:
22+
{{- if .Values.pvc.storageClassName }}
23+
storageClassName: {{ .Values.pvc.storageClassName }}
24+
{{- end }}
25+
accessModes:
26+
- ReadWriteMany
27+
resources:
28+
requests:
29+
storage: {{ .Values.pvc.size }}

workflows/charts/tgi/templates/secret.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
# limitations under the License.
1414

1515
{{- $name := .Values.secret.encodedToken | required ".Values.secret.encodedToken is required in Base64 Format." -}}
16+
---
1617
apiVersion: v1
1718
kind: Secret
1819
metadata:

workflows/charts/tgi/values.yaml

+8-5
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,11 @@ nameOverride: ""
1818
fullnameOverride: ""
1919
deploy:
2020
# -- ConfigMap of Environment Variables
21-
configMapName: intel-proxy-config
21+
configMap:
22+
enabled: true
23+
name: tgi-config
2224
# -- Intel TGI Image
2325
image: ghcr.io/huggingface/text-generation-inference:latest-intel
24-
# -- Model to be loaded
25-
model: HuggingFaceTB/SmolLM-135M
26-
# -- Enable Quantization (ex: bitsandbytes-nf4)
27-
quantize: ""
2826
# -- Number of pods
2927
replicaCount: 1
3028
# -- Resource configuration
@@ -39,6 +37,8 @@ deploy:
3937
requests:
4038
cpu: 1000m
4139
memory: "1Gi"
40+
# -- Security Context Configuration
41+
securityContext: {}
4242
secret:
4343
# -- Base64 Encoded Huggingface Hub API Token
4444
encodedToken: ""
@@ -62,3 +62,6 @@ ingress:
6262
# - secretName: chart-example-tls
6363
# hosts:
6464
# - chart-example.local
65+
pvc:
66+
storageClassName: nil
67+
size: 15Gi

0 commit comments

Comments
 (0)