From 14eb728b4d97fc917c24fdc36ffba182f43d6eb2 Mon Sep 17 00:00:00 2001 From: kgrim Date: Mon, 28 Oct 2024 18:13:17 +0100 Subject: [PATCH 1/4] Added configs for Neon VLLM service --- neon_diana_utils/configuration.py | 38 ++++++++++++ .../docker/backend/docker-compose.yml | 14 +++++ .../backend/mq-services/Chart.yaml | 4 ++ .../backend/mq-services/values.yaml | 3 + .../helm_charts/mq/neon-llm-vllm/.helmignore | 23 +++++++ .../helm_charts/mq/neon-llm-vllm/Chart.yaml | 12 ++++ .../mq/neon-llm-vllm/templates/_helpers.tpl | 62 +++++++++++++++++++ .../neon-llm-vllm/templates/deployment.yaml | 1 + .../mq/neon-llm-vllm/templates/service.yaml | 1 + .../helm_charts/mq/neon-llm-vllm/values.yaml | 11 ++++ neon_diana_utils/templates/llm_personas.yml | 7 +++ .../templates/mq_user_mapping.yml | 2 + .../templates/rmq_backend_config.yml | 11 ++++ 13 files changed, 189 insertions(+) create mode 100644 neon_diana_utils/helm_charts/mq/neon-llm-vllm/.helmignore create mode 100644 neon_diana_utils/helm_charts/mq/neon-llm-vllm/Chart.yaml create mode 100644 neon_diana_utils/helm_charts/mq/neon-llm-vllm/templates/_helpers.tpl create mode 100644 neon_diana_utils/helm_charts/mq/neon-llm-vllm/templates/deployment.yaml create mode 100644 neon_diana_utils/helm_charts/mq/neon-llm-vllm/templates/service.yaml create mode 100644 neon_diana_utils/helm_charts/mq/neon-llm-vllm/values.yaml diff --git a/neon_diana_utils/configuration.py b/neon_diana_utils/configuration.py index f05f4f88..7bc42028 100644 --- a/neon_diana_utils/configuration.py +++ b/neon_diana_utils/configuration.py @@ -95,6 +95,8 @@ def make_llm_bot_config(): configuration['llm_bots']['gemini'] = persona_config['gemini'] if click.confirm("Configure Claude Personas?"): configuration['llm_bots']['claude'] = persona_config['claude'] + if click.confirm("Configure VLLM Personas?"): + configuration['llm_bots']['vllm'] = persona_config['vllm'] return configuration @@ -195,6 +197,40 @@ def make_keys_config(write_config: bool, config_confirmed = \ click.confirm("Is this configuration correct?") + vllm_config = dict() + if click.confirm("Configure VLLM?"): + config_confirmed = False + while not config_confirmed: + vllm_api_url = click.prompt("VLLM API URL", type=str) + vllm_connection_key = click.prompt("VLLM Connection Key", type=str) + vllm_hf_token = click.prompt("Hugging Face Auth Token", type=str) + vllm_role = click.prompt("VLLM Role", + type=str, + default="You are NeonLLM." + "You are trying to give a short " + "answer in less than 40 words.") + vllm_context_depth = click.prompt("VLLM Context depth", + type=int, + default=4) + vllm_max_tokens = click.prompt("Maximum tokens in responses", + type=int, + default=100) + vllm_num_parallel_processes = click.prompt("Number of parallel processes", + type=int, + default=2) + vllm_config = { + "api_url": vllm_api_url, + "key": vllm_connection_key, + "hf_token": vllm_hf_token, + "role": vllm_role, + "context_depth": vllm_context_depth, + "max_tokens": vllm_max_tokens, + "num_parallel_processes": vllm_num_parallel_processes + } + click.echo(pformat(vllm_config)) + config_confirmed = \ + click.confirm("Is this configuration correct?") + fastchat_config = dict() if click.confirm("Configure FastChat LLM?"): config_confirmed = False @@ -323,6 +359,7 @@ def make_keys_config(write_config: bool, "emails": email_config, "track_my_brands": brands_config}, "LLM_CHAT_GPT": chatgpt_config, + "LLM_VLLM": vllm_config, "LLM_FASTCHAT": fastchat_config, "LLM_PALM2": palm2_config, "LLM_GEMINI": gemini_config, @@ -572,6 +609,7 @@ def _get_unconfigured_mq_backend_services(config: dict) -> Set[str]: 'keys.emails': 'neon-email-proxy', 'keys.track_my_brands': 'neon-brands-service', 'LLM_CHAT_GPT': 'neon-llm-chatgpt', + 'LLM_VLLM': 'neon-llm-vllm', 'LLM_FASTCHAT': 'neon-llm-fastchat', 'LLM_CLAUDE': 'neon-llm-claude', 'LLM_GEMINI': 'neon-llm-gemini', diff --git a/neon_diana_utils/docker/backend/docker-compose.yml b/neon_diana_utils/docker/backend/docker-compose.yml index 2fc6c7ee..089e7b9f 100644 --- a/neon_diana_utils/docker/backend/docker-compose.yml +++ b/neon_diana_utils/docker/backend/docker-compose.yml @@ -64,6 +64,20 @@ services: - XDG_DATA_HOME=/xdg/data - XDG_CACHE_HOME=/xdg/cache - XDG_STATE_HOME=/xdg/state + neon-llm-vllm: + container_name: neon-llm-vllm + image: ghcr.io/neongeckocom/neon-llm-vllm:${MQ_IMAGE_TAG} + depends_on: + - neon-rabbitmq + networks: + - diana-backend + volumes: + - xdg:/xdg:rw + environment: + - XDG_CONFIG_HOME=/xdg/config + - XDG_DATA_HOME=/xdg/data + - XDG_CACHE_HOME=/xdg/cache + - XDG_STATE_HOME=/xdg/state neon-llm-fastchat: container_name: neon-llm-fastchat image: ghcr.io/neongeckocom/neon-llm-fastchat:${MQ_IMAGE_TAG} diff --git a/neon_diana_utils/helm_charts/backend/mq-services/Chart.yaml b/neon_diana_utils/helm_charts/backend/mq-services/Chart.yaml index 73c935a7..372a979a 100644 --- a/neon_diana_utils/helm_charts/backend/mq-services/Chart.yaml +++ b/neon_diana_utils/helm_charts/backend/mq-services/Chart.yaml @@ -30,6 +30,10 @@ dependencies: alias: neon-llm-chatgpt version: 0.0.7 repository: file://../../mq/neon-llm-chatgpt + - name: neon-llm-vllm + alias: neon-llm-vllm + version: 0.0.7 + repository: file://../../mq/neon-llm-vllm - name: neon-llm-fastchat alias: neon-llm-fastchat version: 0.0.6 diff --git a/neon_diana_utils/helm_charts/backend/mq-services/values.yaml b/neon_diana_utils/helm_charts/backend/mq-services/values.yaml index 14e586c5..e971fb07 100644 --- a/neon_diana_utils/helm_charts/backend/mq-services/values.yaml +++ b/neon_diana_utils/helm_charts/backend/mq-services/values.yaml @@ -18,6 +18,9 @@ neon-script-parser: neon-llm-chatgpt: image: tag: *tag +neon-llm-vllm: + image: + tag: *tag neon-llm-fastchat: image: tag: *tag \ No newline at end of file diff --git a/neon_diana_utils/helm_charts/mq/neon-llm-vllm/.helmignore b/neon_diana_utils/helm_charts/mq/neon-llm-vllm/.helmignore new file mode 100644 index 00000000..0e8a0eb3 --- /dev/null +++ b/neon_diana_utils/helm_charts/mq/neon-llm-vllm/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/neon_diana_utils/helm_charts/mq/neon-llm-vllm/Chart.yaml b/neon_diana_utils/helm_charts/mq/neon-llm-vllm/Chart.yaml new file mode 100644 index 00000000..8f14f9f7 --- /dev/null +++ b/neon_diana_utils/helm_charts/mq/neon-llm-vllm/Chart.yaml @@ -0,0 +1,12 @@ +apiVersion: v2 +name: neon-llm-vllm +description: Deploy an LLM proxy for Neon VLLM Service + +type: application +version: 0.0.7 +appVersion: "1.0.1a25" + +dependencies: + - name: base-mq + version: 0.0.10 + repository: file://../../base/base-mq \ No newline at end of file diff --git a/neon_diana_utils/helm_charts/mq/neon-llm-vllm/templates/_helpers.tpl b/neon_diana_utils/helm_charts/mq/neon-llm-vllm/templates/_helpers.tpl new file mode 100644 index 00000000..8afd6ed2 --- /dev/null +++ b/neon_diana_utils/helm_charts/mq/neon-llm-vllm/templates/_helpers.tpl @@ -0,0 +1,62 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "neon-llm-service.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "neon-llm-service.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "neon-llm-service.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "neon-llm-service.labels" -}} +helm.sh/chart: {{ include "neon-llm-service.chart" . }} +{{ include "neon-llm-service.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "neon-llm-service.selectorLabels" -}} +app.kubernetes.io/name: {{ include "neon-llm-service.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "neon-llm-service.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "neon-llm-service.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} diff --git a/neon_diana_utils/helm_charts/mq/neon-llm-vllm/templates/deployment.yaml b/neon_diana_utils/helm_charts/mq/neon-llm-vllm/templates/deployment.yaml new file mode 100644 index 00000000..41547ae7 --- /dev/null +++ b/neon_diana_utils/helm_charts/mq/neon-llm-vllm/templates/deployment.yaml @@ -0,0 +1 @@ +{{- include "base-mq.deployment" .}} \ No newline at end of file diff --git a/neon_diana_utils/helm_charts/mq/neon-llm-vllm/templates/service.yaml b/neon_diana_utils/helm_charts/mq/neon-llm-vllm/templates/service.yaml new file mode 100644 index 00000000..80730915 --- /dev/null +++ b/neon_diana_utils/helm_charts/mq/neon-llm-vllm/templates/service.yaml @@ -0,0 +1 @@ +{{- include "base-mq.service" .}} \ No newline at end of file diff --git a/neon_diana_utils/helm_charts/mq/neon-llm-vllm/values.yaml b/neon_diana_utils/helm_charts/mq/neon-llm-vllm/values.yaml new file mode 100644 index 00000000..7279f549 --- /dev/null +++ b/neon_diana_utils/helm_charts/mq/neon-llm-vllm/values.yaml @@ -0,0 +1,11 @@ +serviceName: backend-llm-vllm +replicaCount: 1 +configSecret: diana-config +image: + repository: ghcr.io/neongeckocom/neon-llm-vllm + pullPolicy: Always + tag: dev +resources: + requests: + memory: "1Gi" + cpu: "1.0" diff --git a/neon_diana_utils/templates/llm_personas.yml b/neon_diana_utils/templates/llm_personas.yml index f4126809..09bb5914 100644 --- a/neon_diana_utils/templates/llm_personas.yml +++ b/neon_diana_utils/templates/llm_personas.yml @@ -26,6 +26,13 @@ chat_gpt: You are an AI bot that specializes in counseling and mental health support. Provide guidance on assessments, therapy sessions, crisis intervention, goal setting, referrals, advocacy, education, documentation, and adherence to ethical standards, fostering positive changes in clients' lives. You're attempting to provide a concise response within a 40-word limit. +vllm: + - name: neon + description: | + You are NeonLLM. + - name: patent + description: | + You are PatentLLM. palm2: - name: travel_mate description: | diff --git a/neon_diana_utils/templates/mq_user_mapping.yml b/neon_diana_utils/templates/mq_user_mapping.yml index 06e28bc0..8c22404f 100644 --- a/neon_diana_utils/templates/mq_user_mapping.yml +++ b/neon_diana_utils/templates/mq_user_mapping.yml @@ -13,6 +13,8 @@ neon_libretranslate: - mq-libre-translate neon_llm_chatgpt: - neon_llm_chat_gpt +neon_llm_vllm: + - neon_llm_vllm neon_llm_fastchat: - neon_llm_fastchat neon_llm_claude: diff --git a/neon_diana_utils/templates/rmq_backend_config.yml b/neon_diana_utils/templates/rmq_backend_config.yml index c86369d7..1266490c 100644 --- a/neon_diana_utils/templates/rmq_backend_config.yml +++ b/neon_diana_utils/templates/rmq_backend_config.yml @@ -39,6 +39,12 @@ users: tags: - backend - llm + - name: "neon_llm_vllm" + password: + tags: + - backend + - llm + - vllm - name: "neon_llm_fastchat" password: tags: @@ -109,6 +115,11 @@ permissions: configure: "chat_gpt_.*" write: "chat_gpt_.*|amq\\.default" read: "chat_gpt_.*" + - user: "neon_llm_vllm" + vhost: "/llm" + configure: "vllm_.*" + write: "vllm_.*|amq\\.default" + read: "vllm_.*" - user: "neon_llm_fastchat" vhost: "/llm" configure: "fastchat_.*" From 96cb828e35ebf221d816886783df77aa84f0cf73 Mon Sep 17 00:00:00 2001 From: kgrim Date: Mon, 28 Oct 2024 18:26:35 +0100 Subject: [PATCH 2/4] Fixed unittest issues and default values --- neon_diana_utils/configuration.py | 8 +++----- tests/test_diana_utils.py | 4 +++- tests/test_rabbitmq.json | 7 +++++++ 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/neon_diana_utils/configuration.py b/neon_diana_utils/configuration.py index 7bc42028..587dc544 100644 --- a/neon_diana_utils/configuration.py +++ b/neon_diana_utils/configuration.py @@ -206,15 +206,13 @@ def make_keys_config(write_config: bool, vllm_hf_token = click.prompt("Hugging Face Auth Token", type=str) vllm_role = click.prompt("VLLM Role", type=str, - default="You are NeonLLM." - "You are trying to give a short " - "answer in less than 40 words.") + default="You are NeonLLM.") vllm_context_depth = click.prompt("VLLM Context depth", type=int, default=4) vllm_max_tokens = click.prompt("Maximum tokens in responses", - type=int, - default=100) + type=int, + default=512) vllm_num_parallel_processes = click.prompt("Number of parallel processes", type=int, default=2) diff --git a/tests/test_diana_utils.py b/tests/test_diana_utils.py index 1870ae93..383156e8 100644 --- a/tests/test_diana_utils.py +++ b/tests/test_diana_utils.py @@ -289,6 +289,7 @@ def test_get_unconfigured_backend_services(self): all_configured = {'keys': {'api_services': {'configured': True}, 'emails': {'configured': True}, 'track_my_brands': True}, + 'LLM_VLLM': {'config': False}, 'LLM_CHAT_GPT': {'config': False}, 'LLM_CLAUDE': {'': ''}, 'LLM_PALM2': 'enabled', @@ -304,7 +305,8 @@ def test_get_unconfigured_backend_services(self): self.assertEqual(disabled, {'neon-api-proxy', 'neon-brands-service', 'neon-email-proxy', 'neon-llm-chatgpt', 'neon-llm-fastchat', 'neon-llm-claude', - 'neon-llm-palm', 'neon-llm-gemini'}) + 'neon-llm-palm', 'neon-llm-gemini', + 'neon-llm-vllm'}) def test_get_optional_http_backend(self): from neon_diana_utils.configuration import _get_optional_http_backend diff --git a/tests/test_rabbitmq.json b/tests/test_rabbitmq.json index 787675fb..4c4fec38 100644 --- a/tests/test_rabbitmq.json +++ b/tests/test_rabbitmq.json @@ -131,6 +131,13 @@ "write": ".*", "read": "chat_gpt_input" }, + { + "user": "neon_llm_vllm", + "vhost": "/llm", + "configure": ".*", + "write": ".*", + "read": "vllm_input" + }, { "user": "neon_llm_fastchat", "vhost": "/llm", From 3a609137e8799e7372396a865ba128d9942079e7 Mon Sep 17 00:00:00 2001 From: Daniel McKnight Date: Fri, 1 Nov 2024 09:36:57 -0700 Subject: [PATCH 3/4] Update chart versions and push changes to helm repository --- neon_diana_utils/helm_charts/backend/diana-backend/Chart.yaml | 4 ++-- neon_diana_utils/helm_charts/backend/mq-services/Chart.yaml | 4 ++-- neon_diana_utils/helm_charts/mq/neon-llm-vllm/Chart.yaml | 2 +- neon_diana_utils/templates/backend/Chart.yaml | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/neon_diana_utils/helm_charts/backend/diana-backend/Chart.yaml b/neon_diana_utils/helm_charts/backend/diana-backend/Chart.yaml index a3ca22eb..e831e137 100644 --- a/neon_diana_utils/helm_charts/backend/diana-backend/Chart.yaml +++ b/neon_diana_utils/helm_charts/backend/diana-backend/Chart.yaml @@ -15,7 +15,7 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.1.25 +version: 0.1.26 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to @@ -35,5 +35,5 @@ dependencies: version: 0.0.14 repository: file://../http-services - name: diana-mq - version: 0.0.17 + version: 0.0.18 repository: file://../mq-services \ No newline at end of file diff --git a/neon_diana_utils/helm_charts/backend/mq-services/Chart.yaml b/neon_diana_utils/helm_charts/backend/mq-services/Chart.yaml index 372a979a..64cad0e4 100644 --- a/neon_diana_utils/helm_charts/backend/mq-services/Chart.yaml +++ b/neon_diana_utils/helm_charts/backend/mq-services/Chart.yaml @@ -3,7 +3,7 @@ name: diana-mq description: Deploy DIANA MQ Services type: application -version: 0.0.17 +version: 0.0.18 appVersion: "1.0.1a26" dependencies: - name: neon-api-proxy @@ -32,7 +32,7 @@ dependencies: repository: file://../../mq/neon-llm-chatgpt - name: neon-llm-vllm alias: neon-llm-vllm - version: 0.0.7 + version: 0.0.1 repository: file://../../mq/neon-llm-vllm - name: neon-llm-fastchat alias: neon-llm-fastchat diff --git a/neon_diana_utils/helm_charts/mq/neon-llm-vllm/Chart.yaml b/neon_diana_utils/helm_charts/mq/neon-llm-vllm/Chart.yaml index 8f14f9f7..1619593b 100644 --- a/neon_diana_utils/helm_charts/mq/neon-llm-vllm/Chart.yaml +++ b/neon_diana_utils/helm_charts/mq/neon-llm-vllm/Chart.yaml @@ -3,7 +3,7 @@ name: neon-llm-vllm description: Deploy an LLM proxy for Neon VLLM Service type: application -version: 0.0.7 +version: 0.0.1 appVersion: "1.0.1a25" dependencies: diff --git a/neon_diana_utils/templates/backend/Chart.yaml b/neon_diana_utils/templates/backend/Chart.yaml index 993770c8..030c640f 100644 --- a/neon_diana_utils/templates/backend/Chart.yaml +++ b/neon_diana_utils/templates/backend/Chart.yaml @@ -8,5 +8,5 @@ appVersion: "1.0.1a5" dependencies: - name: backend - version: 0.1.25 + version: 0.1.26 repository: https://neongeckocom.github.io/neon-diana-utils \ No newline at end of file From 2d6f08b42ae1551597d3d3cebce73d975c6738dd Mon Sep 17 00:00:00 2001 From: Daniel McKnight Date: Fri, 1 Nov 2024 11:15:36 -0700 Subject: [PATCH 4/4] Update chart versions to resolve uploaded chart version conflict --- neon_diana_utils/helm_charts/backend/diana-backend/Chart.yaml | 2 +- neon_diana_utils/templates/backend/Chart.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/neon_diana_utils/helm_charts/backend/diana-backend/Chart.yaml b/neon_diana_utils/helm_charts/backend/diana-backend/Chart.yaml index e831e137..9ce1550c 100644 --- a/neon_diana_utils/helm_charts/backend/diana-backend/Chart.yaml +++ b/neon_diana_utils/helm_charts/backend/diana-backend/Chart.yaml @@ -15,7 +15,7 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.1.26 +version: 0.1.27 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to diff --git a/neon_diana_utils/templates/backend/Chart.yaml b/neon_diana_utils/templates/backend/Chart.yaml index 030c640f..9b91d7b4 100644 --- a/neon_diana_utils/templates/backend/Chart.yaml +++ b/neon_diana_utils/templates/backend/Chart.yaml @@ -8,5 +8,5 @@ appVersion: "1.0.1a5" dependencies: - name: backend - version: 0.1.26 + version: 0.1.27 repository: https://neongeckocom.github.io/neon-diana-utils \ No newline at end of file