Skip to content

Commit

Permalink
Added configs for Neon VLLM service
Browse files Browse the repository at this point in the history
  • Loading branch information
kirgrim committed Oct 28, 2024
1 parent e21d057 commit 46e59c4
Show file tree
Hide file tree
Showing 13 changed files with 189 additions and 0 deletions.
38 changes: 38 additions & 0 deletions neon_diana_utils/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,8 @@ def make_llm_bot_config():
configuration['llm_bots']['gemini'] = persona_config['gemini']
if click.confirm("Configure Claude Personas?"):
configuration['llm_bots']['claude'] = persona_config['claude']
if click.confirm("Configure VLLM Personas?"):
configuration['llm_bots']['vllm'] = persona_config['vllm']
return configuration


Expand Down Expand Up @@ -193,6 +195,40 @@ def make_keys_config(write_config: bool,
config_confirmed = \
click.confirm("Is this configuration correct?")

vllm_config = dict()
if click.confirm("Configure VLLM?"):
config_confirmed = False
while not config_confirmed:
vllm_api_url = click.prompt("VLLM API URL", type=str)
vllm_connection_key = click.prompt("VLLM Connection Key", type=str)
vllm_hf_token = click.prompt("Hugging Face Auth Token", type=str)
vllm_role = click.prompt("VLLM Role",
type=str,
default="You are NeonLLM."
"You are trying to give a short "
"answer in less than 40 words.")
vllm_context_depth = click.prompt("VLLM Context depth",
type=int,
default=4)
vllm_max_tokens = click.prompt("Maximum tokens in responses",
type=int,
default=100)
vllm_num_parallel_processes = click.prompt("Number of parallel processes",
type=int,
default=2)
vllm_config = {
"api_url": vllm_api_url,
"key": vllm_connection_key,
"hf_token": vllm_hf_token,
"role": vllm_role,
"context_depth": vllm_context_depth,
"max_tokens": vllm_max_tokens,
"num_parallel_processes": vllm_num_parallel_processes
}
click.echo(pformat(vllm_config))
config_confirmed = \
click.confirm("Is this configuration correct?")

fastchat_config = dict()
if click.confirm("Configure FastChat LLM?"):
config_confirmed = False
Expand Down Expand Up @@ -300,6 +336,7 @@ def make_keys_config(write_config: bool,
"emails": email_config,
"track_my_brands": brands_config},
"LLM_CHAT_GPT": chatgpt_config,
"LLM_VLLM": vllm_config,
"LLM_FASTCHAT": fastchat_config,
"LLM_PALM2": palm2_config,
"LLM_GEMINI": gemini_config,
Expand Down Expand Up @@ -544,6 +581,7 @@ def _get_unconfigured_mq_backend_services(config: dict) -> Set[str]:
'keys.emails': 'neon-email-proxy',
'keys.track_my_brands': 'neon-brands-service',
'LLM_CHAT_GPT': 'neon-llm-chatgpt',
'LLM_VLLM': 'neon-llm-vllm',
'LLM_FASTCHAT': 'neon-llm-fastchat',
'LLM_CLAUDE': 'neon-llm-claude',
'LLM_GEMINI': 'neon-llm-gemini',
Expand Down
14 changes: 14 additions & 0 deletions neon_diana_utils/docker/backend/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,20 @@ services:
- XDG_DATA_HOME=/xdg/data
- XDG_CACHE_HOME=/xdg/cache
- XDG_STATE_HOME=/xdg/state
neon-llm-vllm:
container_name: neon-llm-vllm
image: ghcr.io/neongeckocom/neon-llm-vllm:${MQ_IMAGE_TAG}
depends_on:
- neon-rabbitmq
networks:
- diana-backend
volumes:
- xdg:/xdg:rw
environment:
- XDG_CONFIG_HOME=/xdg/config
- XDG_DATA_HOME=/xdg/data
- XDG_CACHE_HOME=/xdg/cache
- XDG_STATE_HOME=/xdg/state
neon-llm-fastchat:
container_name: neon-llm-fastchat
image: ghcr.io/neongeckocom/neon-llm-fastchat:${MQ_IMAGE_TAG}
Expand Down
4 changes: 4 additions & 0 deletions neon_diana_utils/helm_charts/backend/mq-services/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ dependencies:
alias: neon-llm-chatgpt
version: 0.0.7
repository: file://../../mq/neon-llm-chatgpt
- name: neon-llm-vllm
alias: neon-llm-vllm
version: 0.0.7
repository: file://../../mq/neon-llm-vllm
- name: neon-llm-fastchat
alias: neon-llm-fastchat
version: 0.0.6
Expand Down
3 changes: 3 additions & 0 deletions neon_diana_utils/helm_charts/backend/mq-services/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ neon-script-parser:
neon-llm-chatgpt:
image:
tag: *tag
neon-llm-vllm:
image:
tag: *tag
neon-llm-fastchat:
image:
tag: *tag
23 changes: 23 additions & 0 deletions neon_diana_utils/helm_charts/mq/neon-llm-vllm/.helmignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Patterns to ignore when building packages.
# This supports shell glob matching, relative path matching, and
# negation (prefixed with !). Only one pattern per line.
.DS_Store
# Common VCS dirs
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
# Common backup files
*.swp
*.bak
*.tmp
*.orig
*~
# Various IDEs
.project
.idea/
*.tmproj
.vscode/
12 changes: 12 additions & 0 deletions neon_diana_utils/helm_charts/mq/neon-llm-vllm/Chart.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
apiVersion: v2
name: neon-llm-vllm
description: Deploy an LLM proxy for Neon VLLM Service

type: application
version: 0.0.7
appVersion: "1.0.1a25"

dependencies:
- name: base-mq
version: 0.0.10
repository: file://../../base/base-mq
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
{{/*
Expand the name of the chart.
*/}}
{{- define "neon-llm-service.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
{{- end }}

{{/*
Create a default fully qualified app name.
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
If release name contains chart name it will be used as a full name.
*/}}
{{- define "neon-llm-service.fullname" -}}
{{- if .Values.fullnameOverride }}
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- $name := default .Chart.Name .Values.nameOverride }}
{{- if contains $name .Release.Name }}
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
{{- end }}
{{- end }}
{{- end }}

{{/*
Create chart name and version as used by the chart label.
*/}}
{{- define "neon-llm-service.chart" -}}
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
{{- end }}

{{/*
Common labels
*/}}
{{- define "neon-llm-service.labels" -}}
helm.sh/chart: {{ include "neon-llm-service.chart" . }}
{{ include "neon-llm-service.selectorLabels" . }}
{{- if .Chart.AppVersion }}
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
{{- end }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
{{- end }}

{{/*
Selector labels
*/}}
{{- define "neon-llm-service.selectorLabels" -}}
app.kubernetes.io/name: {{ include "neon-llm-service.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- end }}

{{/*
Create the name of the service account to use
*/}}
{{- define "neon-llm-service.serviceAccountName" -}}
{{- if .Values.serviceAccount.create }}
{{- default (include "neon-llm-service.fullname" .) .Values.serviceAccount.name }}
{{- else }}
{{- default "default" .Values.serviceAccount.name }}
{{- end }}
{{- end }}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{{- include "base-mq.deployment" .}}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{{- include "base-mq.service" .}}
11 changes: 11 additions & 0 deletions neon_diana_utils/helm_charts/mq/neon-llm-vllm/values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
serviceName: backend-llm-vllm
replicaCount: 1
configSecret: diana-config
image:
repository: ghcr.io/neongeckocom/neon-llm-vllm
pullPolicy: Always
tag: dev
resources:
requests:
memory: "1Gi"
cpu: "1.0"
7 changes: 7 additions & 0 deletions neon_diana_utils/templates/llm_personas.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,13 @@ chat_gpt:
You are an AI bot that specializes in counseling and mental health support.
Provide guidance on assessments, therapy sessions, crisis intervention, goal setting, referrals, advocacy, education, documentation, and adherence to ethical standards, fostering positive changes in clients' lives.
You're attempting to provide a concise response within a 40-word limit.
vllm:
- name: neon
description: |
You are NeonLLM.
- name: patent
description: |
You are PatentLLM.
palm2:
- name: travel_mate
description: |
Expand Down
2 changes: 2 additions & 0 deletions neon_diana_utils/templates/mq_user_mapping.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ neon_libretranslate:
- mq-libre-translate
neon_llm_chatgpt:
- neon_llm_chat_gpt
neon_llm_vllm:
- neon_llm_vllm
neon_llm_fastchat:
- neon_llm_fastchat
neon_llm_claude:
Expand Down
11 changes: 11 additions & 0 deletions neon_diana_utils/templates/rmq_backend_config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,12 @@ users:
tags:
- backend
- llm
- name: "neon_llm_vllm"
password:
tags:
- backend
- llm
- vllm
- name: "neon_llm_fastchat"
password:
tags:
Expand Down Expand Up @@ -109,6 +115,11 @@ permissions:
configure: "chat_gpt_.*"
write: "chat_gpt_.*|amq\\.default"
read: "chat_gpt_.*"
- user: "neon_llm_vllm"
vhost: "/llm"
configure: "vllm_.*"
write: "vllm_.*|amq\\.default"
read: "vllm_.*"
- user: "neon_llm_fastchat"
vhost: "/llm"
configure: "fastchat_.*"
Expand Down

0 comments on commit 46e59c4

Please sign in to comment.