Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added configs for Neon VLLM service #79

Merged
merged 4 commits into from
Nov 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions neon_diana_utils/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,8 @@ def make_llm_bot_config():
configuration['llm_bots']['gemini'] = persona_config['gemini']
if click.confirm("Configure Claude Personas?"):
configuration['llm_bots']['claude'] = persona_config['claude']
if click.confirm("Configure VLLM Personas?"):
configuration['llm_bots']['vllm'] = persona_config['vllm']
return configuration


Expand Down Expand Up @@ -195,6 +197,38 @@ def make_keys_config(write_config: bool,
config_confirmed = \
click.confirm("Is this configuration correct?")

vllm_config = dict()
if click.confirm("Configure VLLM?"):
config_confirmed = False
while not config_confirmed:
vllm_api_url = click.prompt("VLLM API URL", type=str)
vllm_connection_key = click.prompt("VLLM Connection Key", type=str)
vllm_hf_token = click.prompt("Hugging Face Auth Token", type=str)
vllm_role = click.prompt("VLLM Role",
type=str,
default="You are NeonLLM.")
vllm_context_depth = click.prompt("VLLM Context depth",
type=int,
default=4)
vllm_max_tokens = click.prompt("Maximum tokens in responses",
type=int,
default=512)
vllm_num_parallel_processes = click.prompt("Number of parallel processes",
type=int,
default=2)
vllm_config = {
"api_url": vllm_api_url,
"key": vllm_connection_key,
"hf_token": vllm_hf_token,
"role": vllm_role,
"context_depth": vllm_context_depth,
"max_tokens": vllm_max_tokens,
"num_parallel_processes": vllm_num_parallel_processes
}
click.echo(pformat(vllm_config))
config_confirmed = \
click.confirm("Is this configuration correct?")

fastchat_config = dict()
if click.confirm("Configure FastChat LLM?"):
config_confirmed = False
Expand Down Expand Up @@ -323,6 +357,7 @@ def make_keys_config(write_config: bool,
"emails": email_config,
"track_my_brands": brands_config},
"LLM_CHAT_GPT": chatgpt_config,
"LLM_VLLM": vllm_config,
"LLM_FASTCHAT": fastchat_config,
"LLM_PALM2": palm2_config,
"LLM_GEMINI": gemini_config,
Expand Down Expand Up @@ -572,6 +607,7 @@ def _get_unconfigured_mq_backend_services(config: dict) -> Set[str]:
'keys.emails': 'neon-email-proxy',
'keys.track_my_brands': 'neon-brands-service',
'LLM_CHAT_GPT': 'neon-llm-chatgpt',
'LLM_VLLM': 'neon-llm-vllm',
'LLM_FASTCHAT': 'neon-llm-fastchat',
'LLM_CLAUDE': 'neon-llm-claude',
'LLM_GEMINI': 'neon-llm-gemini',
Expand Down
14 changes: 14 additions & 0 deletions neon_diana_utils/docker/backend/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,20 @@ services:
- XDG_DATA_HOME=/xdg/data
- XDG_CACHE_HOME=/xdg/cache
- XDG_STATE_HOME=/xdg/state
neon-llm-vllm:
container_name: neon-llm-vllm
image: ghcr.io/neongeckocom/neon-llm-vllm:${MQ_IMAGE_TAG}
depends_on:
- neon-rabbitmq
networks:
- diana-backend
volumes:
- xdg:/xdg:rw
environment:
- XDG_CONFIG_HOME=/xdg/config
- XDG_DATA_HOME=/xdg/data
- XDG_CACHE_HOME=/xdg/cache
- XDG_STATE_HOME=/xdg/state
neon-llm-fastchat:
container_name: neon-llm-fastchat
image: ghcr.io/neongeckocom/neon-llm-fastchat:${MQ_IMAGE_TAG}
Expand Down
4 changes: 2 additions & 2 deletions neon_diana_utils/helm_charts/backend/diana-backend/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.1.25
version: 0.1.27

# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
Expand All @@ -35,5 +35,5 @@ dependencies:
version: 0.0.14
repository: file://../http-services
- name: diana-mq
version: 0.0.17
version: 0.0.18
repository: file://../mq-services
6 changes: 5 additions & 1 deletion neon_diana_utils/helm_charts/backend/mq-services/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ name: diana-mq
description: Deploy DIANA MQ Services

type: application
version: 0.0.17
version: 0.0.18
appVersion: "1.0.1a26"
dependencies:
- name: neon-api-proxy
Expand All @@ -30,6 +30,10 @@ dependencies:
alias: neon-llm-chatgpt
version: 0.0.7
repository: file://../../mq/neon-llm-chatgpt
- name: neon-llm-vllm
alias: neon-llm-vllm
version: 0.0.1
repository: file://../../mq/neon-llm-vllm
- name: neon-llm-fastchat
alias: neon-llm-fastchat
version: 0.0.6
Expand Down
3 changes: 3 additions & 0 deletions neon_diana_utils/helm_charts/backend/mq-services/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ neon-script-parser:
neon-llm-chatgpt:
image:
tag: *tag
neon-llm-vllm:
image:
tag: *tag
neon-llm-fastchat:
image:
tag: *tag
23 changes: 23 additions & 0 deletions neon_diana_utils/helm_charts/mq/neon-llm-vllm/.helmignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Patterns to ignore when building packages.
# This supports shell glob matching, relative path matching, and
# negation (prefixed with !). Only one pattern per line.
.DS_Store
# Common VCS dirs
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
# Common backup files
*.swp
*.bak
*.tmp
*.orig
*~
# Various IDEs
.project
.idea/
*.tmproj
.vscode/
12 changes: 12 additions & 0 deletions neon_diana_utils/helm_charts/mq/neon-llm-vllm/Chart.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
apiVersion: v2
name: neon-llm-vllm
description: Deploy an LLM proxy for Neon VLLM Service

type: application
version: 0.0.1
appVersion: "1.0.1a25"

dependencies:
- name: base-mq
version: 0.0.10
repository: file://../../base/base-mq
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
{{/*
Expand the name of the chart.
*/}}
{{- define "neon-llm-service.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
{{- end }}

{{/*
Create a default fully qualified app name.
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
If release name contains chart name it will be used as a full name.
*/}}
{{- define "neon-llm-service.fullname" -}}
{{- if .Values.fullnameOverride }}
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- $name := default .Chart.Name .Values.nameOverride }}
{{- if contains $name .Release.Name }}
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
{{- end }}
{{- end }}
{{- end }}

{{/*
Create chart name and version as used by the chart label.
*/}}
{{- define "neon-llm-service.chart" -}}
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
{{- end }}

{{/*
Common labels
*/}}
{{- define "neon-llm-service.labels" -}}
helm.sh/chart: {{ include "neon-llm-service.chart" . }}
{{ include "neon-llm-service.selectorLabels" . }}
{{- if .Chart.AppVersion }}
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
{{- end }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
{{- end }}

{{/*
Selector labels
*/}}
{{- define "neon-llm-service.selectorLabels" -}}
app.kubernetes.io/name: {{ include "neon-llm-service.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- end }}

{{/*
Create the name of the service account to use
*/}}
{{- define "neon-llm-service.serviceAccountName" -}}
{{- if .Values.serviceAccount.create }}
{{- default (include "neon-llm-service.fullname" .) .Values.serviceAccount.name }}
{{- else }}
{{- default "default" .Values.serviceAccount.name }}
{{- end }}
{{- end }}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{{- include "base-mq.deployment" .}}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{{- include "base-mq.service" .}}
11 changes: 11 additions & 0 deletions neon_diana_utils/helm_charts/mq/neon-llm-vllm/values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
serviceName: backend-llm-vllm
replicaCount: 1
configSecret: diana-config
image:
repository: ghcr.io/neongeckocom/neon-llm-vllm
pullPolicy: Always
tag: dev
resources:
requests:
memory: "1Gi"
cpu: "1.0"
2 changes: 1 addition & 1 deletion neon_diana_utils/templates/backend/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,5 @@ appVersion: "1.0.1a5"

dependencies:
- name: backend
version: 0.1.25
version: 0.1.27
repository: https://neongeckocom.github.io/neon-diana-utils
7 changes: 7 additions & 0 deletions neon_diana_utils/templates/llm_personas.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,13 @@ chat_gpt:
You are an AI bot that specializes in counseling and mental health support.
Provide guidance on assessments, therapy sessions, crisis intervention, goal setting, referrals, advocacy, education, documentation, and adherence to ethical standards, fostering positive changes in clients' lives.
You're attempting to provide a concise response within a 40-word limit.
vllm:
- name: neon
description: |
You are NeonLLM.
- name: patent
description: |
You are PatentLLM.
palm2:
- name: travel_mate
description: |
Expand Down
2 changes: 2 additions & 0 deletions neon_diana_utils/templates/mq_user_mapping.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ neon_libretranslate:
- mq-libre-translate
neon_llm_chatgpt:
- neon_llm_chat_gpt
neon_llm_vllm:
- neon_llm_vllm
neon_llm_fastchat:
- neon_llm_fastchat
neon_llm_claude:
Expand Down
11 changes: 11 additions & 0 deletions neon_diana_utils/templates/rmq_backend_config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,12 @@ users:
tags:
- backend
- llm
- name: "neon_llm_vllm"
password:
tags:
- backend
- llm
- vllm
- name: "neon_llm_fastchat"
password:
tags:
Expand Down Expand Up @@ -109,6 +115,11 @@ permissions:
configure: "chat_gpt_.*"
write: "chat_gpt_.*|amq\\.default"
read: "chat_gpt_.*"
- user: "neon_llm_vllm"
vhost: "/llm"
configure: "vllm_.*"
write: "vllm_.*|amq\\.default"
read: "vllm_.*"
- user: "neon_llm_fastchat"
vhost: "/llm"
configure: "fastchat_.*"
Expand Down
4 changes: 3 additions & 1 deletion tests/test_diana_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,7 @@ def test_get_unconfigured_backend_services(self):
all_configured = {'keys': {'api_services': {'configured': True},
'emails': {'configured': True},
'track_my_brands': True},
'LLM_VLLM': {'config': False},
'LLM_CHAT_GPT': {'config': False},
'LLM_CLAUDE': {'': ''},
'LLM_PALM2': 'enabled',
Expand All @@ -304,7 +305,8 @@ def test_get_unconfigured_backend_services(self):
self.assertEqual(disabled, {'neon-api-proxy', 'neon-brands-service',
'neon-email-proxy', 'neon-llm-chatgpt',
'neon-llm-fastchat', 'neon-llm-claude',
'neon-llm-palm', 'neon-llm-gemini'})
'neon-llm-palm', 'neon-llm-gemini',
'neon-llm-vllm'})

def test_get_optional_http_backend(self):
from neon_diana_utils.configuration import _get_optional_http_backend
Expand Down
7 changes: 7 additions & 0 deletions tests/test_rabbitmq.json
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,13 @@
"write": ".*",
"read": "chat_gpt_input"
},
{
"user": "neon_llm_vllm",
"vhost": "/llm",
"configure": ".*",
"write": ".*",
"read": "vllm_input"
},
{
"user": "neon_llm_fastchat",
"vhost": "/llm",
Expand Down
Loading