Skip to content

Commit b62a7a6

Browse files
author
tylertitsworth
committed
update for support with gaudi2
Signed-off-by: tylertitsworth <tyler.titsworth@intel.com>
1 parent 2759b79 commit b62a7a6

File tree

6 files changed

+72
-24
lines changed

6 files changed

+72
-24
lines changed

workflows/charts/tgi/Chart.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ type: application
3333
# This is the chart version. This version number should be incremented each time you make changes
3434
# to the chart and its templates, including the app version.
3535
# Versions are expected to follow Semantic Versioning (https://semver.org/)
36-
version: 0.1.0
36+
version: 0.2.0
3737

3838
# This is the version number of the application being deployed. This version number should be
3939
# incremented each time you make changes to the application. Versions are not expected to

workflows/charts/tgi/README.md

+5-2
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,13 @@ For more information about how to use Huggingface text-generation-inference with
77
> [!TIP]
88
> For Gaudi-related documentation, check out [tgi-gaudi](https://github.com/huggingface/tgi-gaudi).
99
10-
![Version: 0.1.0](https://img.shields.io/badge/Version-0.1.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 1.16.0](https://img.shields.io/badge/AppVersion-1.16.0-informational?style=flat-square)
10+
![Version: 0.2.0](https://img.shields.io/badge/Version-0.2.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 1.16.0](https://img.shields.io/badge/AppVersion-1.16.0-informational?style=flat-square)
1111

1212
## Values
1313

1414
| Key | Type | Default | Description |
1515
|-----|------|---------|-------------|
16-
| deploy.configMapName | string | `"intel-proxy-config"` | ConfigMap of Environment Variables |
16+
| deploy.configMap | object | `{"enabled":true,"name":"intel-proxy-config"}` | ConfigMap of Environment Variables |
1717
| deploy.image | string | `"ghcr.io/huggingface/text-generation-inference:latest-intel"` | Intel TGI Image |
1818
| deploy.model | string | `"HuggingFaceTB/SmolLM-135M"` | Model to be loaded |
1919
| deploy.quantize | string | `""` | Enable Quantization (ex: bitsandbytes-nf4) |
@@ -23,7 +23,10 @@ For more information about how to use Huggingface text-generation-inference with
2323
| fullnameOverride | string | `""` | Full qualified Domain Name |
2424
| ingress | object | `{"annotations":{},"className":"","enabled":false,"hosts":[{"host":"chart-example.local","paths":[{"path":"/","pathType":"ImplementationSpecific"}]}],"tls":[]}` | Ingress configuration |
2525
| nameOverride | string | `""` | Name of the serving service |
26+
| pvc.size | string | `"15Gi"` | |
27+
| pvc.storageClassName | string | `"nil"` | |
2628
| secret.encodedToken | string | `""` | Base64 Encoded Huggingface Hub API Token |
29+
| securityContext | object | `{}` | Security Context Configuration |
2730
| service | object | `{"port":80,"type":"NodePort"}` | Service configuration |
2831

2932
----------------------------------------------

workflows/charts/tgi/templates/deploy.yaml

+28-20
Original file line numberDiff line numberDiff line change
@@ -28,54 +28,62 @@ spec:
2828
labels:
2929
{{- include "tgi.selectorLabels" . | nindent 8 }}
3030
spec:
31-
securityContext:
32-
fsGroup: 1000
33-
runAsUser: 1000
31+
hostIPC: true
3432
containers:
3533
- name: {{ .Chart.Name }}
3634
args:
37-
- '--model-id'
38-
- {{ .Values.deploy.model | quote }}
39-
{{- if index .Values.deploy.resources.limits "gpu.intel.com/i915" }}
40-
- '--num-shard'
41-
- {{ index .Values.deploy.resources.limits "gpu.intel.com/i915" | quote }}
42-
{{- end }}
4335
- '-p'
4436
- {{ .Values.service.port | quote }}
45-
{{- if .Values.quantize }}
46-
- '--quantize'
47-
- {{ .Values.deploy.quantize | quote }}
48-
{{- end }}
4937
- '--cuda-graphs=0'
5038
envFrom:
39+
{{- if eq .Values.deploy.configMap.enabled true }}
5140
- configMapRef:
52-
name: {{ .Values.deploy.configMapName }}
41+
name: {{ .Values.deploy.configMap.name }}
42+
{{- end }}
5343
- secretRef:
5444
name: {{ .Release.Name }}-hf-token
55-
env:
56-
- name: NUMBA_CACHE_DIR # https://github.com/huggingface/text-generation-inference/pull/2443
57-
value: /data/numba_cache
45+
# env:
46+
# - name: NUMBA_CACHE_DIR # https://github.com/huggingface/text-generation-inference/pull/2443
47+
# value: /data/numba_cache
5848
image: {{ .Values.deploy.image }}
5949
livenessProbe:
60-
httpGet:
61-
path: /health
62-
port: {{ .Values.service.port }}
50+
failureThreshold: 10
6351
initialDelaySeconds: 5
6452
periodSeconds: 5
53+
tcpSocket:
54+
port: http
55+
readinessProbe:
56+
initialDelaySeconds: 5
57+
periodSeconds: 5
58+
tcpSocket:
59+
port: http
60+
startupProbe:
61+
failureThreshold: 120
62+
initialDelaySeconds: 20
63+
periodSeconds: 5
64+
tcpSocket:
65+
port: http
6566
ports:
6667
- name: http
6768
containerPort: {{ .Values.service.port }}
6869
protocol: TCP
6970
resources:
7071
{{- toYaml .Values.deploy.resources | nindent 12 }}
72+
securityContext:
73+
{{ toYaml .Values.securityContext | nindent 12 }}
7174
volumeMounts:
7275
- mountPath: /dev/shm
7376
name: dshm
7477
- mountPath: /data
7578
name: hf-data
79+
- mountPath: /tmp
80+
name: tmp
7681
volumes:
7782
- name: dshm
7883
emptyDir:
7984
medium: Memory
8085
- name: hf-data
86+
persistentVolumeClaim:
87+
claimName: {{ include "tgi.fullname" . }}-cache
88+
- name: tmp
8189
emptyDir: {}
+29
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# Copyright (c) 2024 Intel Corporation
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
apiVersion: v1
16+
kind: PersistentVolumeClaim
17+
metadata:
18+
name: {{ include "tgi.fullname" . }}-cache
19+
labels:
20+
{{- include "tgi.labels" . | nindent 4 }}
21+
spec:
22+
{{- if .Values.pvc.storageClassName }}
23+
storageClassName: {{ .Values.pvc.storageClassName }}
24+
{{- end }}
25+
accessModes:
26+
- ReadWriteMany
27+
resources:
28+
requests:
29+
storage: {{ .Values.pvc.size }}

workflows/charts/tgi/templates/secret.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
# limitations under the License.
1414

1515
{{- $name := .Values.secret.encodedToken | required ".Values.secret.encodedToken is required in Base64 Format." -}}
16+
---
1617
apiVersion: v1
1718
kind: Secret
1819
metadata:

workflows/charts/tgi/values.yaml

+8-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,9 @@ nameOverride: ""
1818
fullnameOverride: ""
1919
deploy:
2020
# -- ConfigMap of Environment Variables
21-
configMapName: intel-proxy-config
21+
configMap:
22+
enabled: true
23+
name: intel-proxy-config
2224
# -- Intel TGI Image
2325
image: ghcr.io/huggingface/text-generation-inference:latest-intel
2426
# -- Model to be loaded
@@ -39,6 +41,8 @@ deploy:
3941
requests:
4042
cpu: 1000m
4143
memory: "1Gi"
44+
# -- Security Context Configuration
45+
securityContext: {}
4246
secret:
4347
# -- Base64 Encoded Huggingface Hub API Token
4448
encodedToken: ""
@@ -62,3 +66,6 @@ ingress:
6266
# - secretName: chart-example-tls
6367
# hosts:
6468
# - chart-example.local
69+
pvc:
70+
storageClassName: nil
71+
size: 15Gi

0 commit comments

Comments
 (0)