diff --git a/neon_diana_utils/configuration.py b/neon_diana_utils/configuration.py index f05f4f88..587dc544 100644 --- a/neon_diana_utils/configuration.py +++ b/neon_diana_utils/configuration.py @@ -95,6 +95,8 @@ def make_llm_bot_config(): configuration['llm_bots']['gemini'] = persona_config['gemini'] if click.confirm("Configure Claude Personas?"): configuration['llm_bots']['claude'] = persona_config['claude'] + if click.confirm("Configure VLLM Personas?"): + configuration['llm_bots']['vllm'] = persona_config['vllm'] return configuration @@ -195,6 +197,38 @@ def make_keys_config(write_config: bool, config_confirmed = \ click.confirm("Is this configuration correct?") + vllm_config = dict() + if click.confirm("Configure VLLM?"): + config_confirmed = False + while not config_confirmed: + vllm_api_url = click.prompt("VLLM API URL", type=str) + vllm_connection_key = click.prompt("VLLM Connection Key", type=str) + vllm_hf_token = click.prompt("Hugging Face Auth Token", type=str) + vllm_role = click.prompt("VLLM Role", + type=str, + default="You are NeonLLM.") + vllm_context_depth = click.prompt("VLLM Context depth", + type=int, + default=4) + vllm_max_tokens = click.prompt("Maximum tokens in responses", + type=int, + default=512) + vllm_num_parallel_processes = click.prompt("Number of parallel processes", + type=int, + default=2) + vllm_config = { + "api_url": vllm_api_url, + "key": vllm_connection_key, + "hf_token": vllm_hf_token, + "role": vllm_role, + "context_depth": vllm_context_depth, + "max_tokens": vllm_max_tokens, + "num_parallel_processes": vllm_num_parallel_processes + } + click.echo(pformat(vllm_config)) + config_confirmed = \ + click.confirm("Is this configuration correct?") + fastchat_config = dict() if click.confirm("Configure FastChat LLM?"): config_confirmed = False @@ -323,6 +357,7 @@ def make_keys_config(write_config: bool, "emails": email_config, "track_my_brands": brands_config}, "LLM_CHAT_GPT": chatgpt_config, + "LLM_VLLM": vllm_config, "LLM_FASTCHAT": fastchat_config, "LLM_PALM2": palm2_config, "LLM_GEMINI": gemini_config, @@ -572,6 +607,7 @@ def _get_unconfigured_mq_backend_services(config: dict) -> Set[str]: 'keys.emails': 'neon-email-proxy', 'keys.track_my_brands': 'neon-brands-service', 'LLM_CHAT_GPT': 'neon-llm-chatgpt', + 'LLM_VLLM': 'neon-llm-vllm', 'LLM_FASTCHAT': 'neon-llm-fastchat', 'LLM_CLAUDE': 'neon-llm-claude', 'LLM_GEMINI': 'neon-llm-gemini', diff --git a/neon_diana_utils/docker/backend/docker-compose.yml b/neon_diana_utils/docker/backend/docker-compose.yml index 2fc6c7ee..089e7b9f 100644 --- a/neon_diana_utils/docker/backend/docker-compose.yml +++ b/neon_diana_utils/docker/backend/docker-compose.yml @@ -64,6 +64,20 @@ services: - XDG_DATA_HOME=/xdg/data - XDG_CACHE_HOME=/xdg/cache - XDG_STATE_HOME=/xdg/state + neon-llm-vllm: + container_name: neon-llm-vllm + image: ghcr.io/neongeckocom/neon-llm-vllm:${MQ_IMAGE_TAG} + depends_on: + - neon-rabbitmq + networks: + - diana-backend + volumes: + - xdg:/xdg:rw + environment: + - XDG_CONFIG_HOME=/xdg/config + - XDG_DATA_HOME=/xdg/data + - XDG_CACHE_HOME=/xdg/cache + - XDG_STATE_HOME=/xdg/state neon-llm-fastchat: container_name: neon-llm-fastchat image: ghcr.io/neongeckocom/neon-llm-fastchat:${MQ_IMAGE_TAG} diff --git a/neon_diana_utils/helm_charts/backend/diana-backend/Chart.yaml b/neon_diana_utils/helm_charts/backend/diana-backend/Chart.yaml index a3ca22eb..9ce1550c 100644 --- a/neon_diana_utils/helm_charts/backend/diana-backend/Chart.yaml +++ b/neon_diana_utils/helm_charts/backend/diana-backend/Chart.yaml @@ -15,7 +15,7 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.1.25 +version: 0.1.27 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to @@ -35,5 +35,5 @@ dependencies: version: 0.0.14 repository: file://../http-services - name: diana-mq - version: 0.0.17 + version: 0.0.18 repository: file://../mq-services \ No newline at end of file diff --git a/neon_diana_utils/helm_charts/backend/mq-services/Chart.yaml b/neon_diana_utils/helm_charts/backend/mq-services/Chart.yaml index 73c935a7..64cad0e4 100644 --- a/neon_diana_utils/helm_charts/backend/mq-services/Chart.yaml +++ b/neon_diana_utils/helm_charts/backend/mq-services/Chart.yaml @@ -3,7 +3,7 @@ name: diana-mq description: Deploy DIANA MQ Services type: application -version: 0.0.17 +version: 0.0.18 appVersion: "1.0.1a26" dependencies: - name: neon-api-proxy @@ -30,6 +30,10 @@ dependencies: alias: neon-llm-chatgpt version: 0.0.7 repository: file://../../mq/neon-llm-chatgpt + - name: neon-llm-vllm + alias: neon-llm-vllm + version: 0.0.1 + repository: file://../../mq/neon-llm-vllm - name: neon-llm-fastchat alias: neon-llm-fastchat version: 0.0.6 diff --git a/neon_diana_utils/helm_charts/backend/mq-services/values.yaml b/neon_diana_utils/helm_charts/backend/mq-services/values.yaml index 14e586c5..e971fb07 100644 --- a/neon_diana_utils/helm_charts/backend/mq-services/values.yaml +++ b/neon_diana_utils/helm_charts/backend/mq-services/values.yaml @@ -18,6 +18,9 @@ neon-script-parser: neon-llm-chatgpt: image: tag: *tag +neon-llm-vllm: + image: + tag: *tag neon-llm-fastchat: image: tag: *tag \ No newline at end of file diff --git a/neon_diana_utils/helm_charts/mq/neon-llm-vllm/.helmignore b/neon_diana_utils/helm_charts/mq/neon-llm-vllm/.helmignore new file mode 100644 index 00000000..0e8a0eb3 --- /dev/null +++ b/neon_diana_utils/helm_charts/mq/neon-llm-vllm/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/neon_diana_utils/helm_charts/mq/neon-llm-vllm/Chart.yaml b/neon_diana_utils/helm_charts/mq/neon-llm-vllm/Chart.yaml new file mode 100644 index 00000000..1619593b --- /dev/null +++ b/neon_diana_utils/helm_charts/mq/neon-llm-vllm/Chart.yaml @@ -0,0 +1,12 @@ +apiVersion: v2 +name: neon-llm-vllm +description: Deploy an LLM proxy for Neon VLLM Service + +type: application +version: 0.0.1 +appVersion: "1.0.1a25" + +dependencies: + - name: base-mq + version: 0.0.10 + repository: file://../../base/base-mq \ No newline at end of file diff --git a/neon_diana_utils/helm_charts/mq/neon-llm-vllm/templates/_helpers.tpl b/neon_diana_utils/helm_charts/mq/neon-llm-vllm/templates/_helpers.tpl new file mode 100644 index 00000000..8afd6ed2 --- /dev/null +++ b/neon_diana_utils/helm_charts/mq/neon-llm-vllm/templates/_helpers.tpl @@ -0,0 +1,62 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "neon-llm-service.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "neon-llm-service.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "neon-llm-service.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "neon-llm-service.labels" -}} +helm.sh/chart: {{ include "neon-llm-service.chart" . }} +{{ include "neon-llm-service.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "neon-llm-service.selectorLabels" -}} +app.kubernetes.io/name: {{ include "neon-llm-service.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "neon-llm-service.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "neon-llm-service.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} diff --git a/neon_diana_utils/helm_charts/mq/neon-llm-vllm/templates/deployment.yaml b/neon_diana_utils/helm_charts/mq/neon-llm-vllm/templates/deployment.yaml new file mode 100644 index 00000000..41547ae7 --- /dev/null +++ b/neon_diana_utils/helm_charts/mq/neon-llm-vllm/templates/deployment.yaml @@ -0,0 +1 @@ +{{- include "base-mq.deployment" .}} \ No newline at end of file diff --git a/neon_diana_utils/helm_charts/mq/neon-llm-vllm/templates/service.yaml b/neon_diana_utils/helm_charts/mq/neon-llm-vllm/templates/service.yaml new file mode 100644 index 00000000..80730915 --- /dev/null +++ b/neon_diana_utils/helm_charts/mq/neon-llm-vllm/templates/service.yaml @@ -0,0 +1 @@ +{{- include "base-mq.service" .}} \ No newline at end of file diff --git a/neon_diana_utils/helm_charts/mq/neon-llm-vllm/values.yaml b/neon_diana_utils/helm_charts/mq/neon-llm-vllm/values.yaml new file mode 100644 index 00000000..7279f549 --- /dev/null +++ b/neon_diana_utils/helm_charts/mq/neon-llm-vllm/values.yaml @@ -0,0 +1,11 @@ +serviceName: backend-llm-vllm +replicaCount: 1 +configSecret: diana-config +image: + repository: ghcr.io/neongeckocom/neon-llm-vllm + pullPolicy: Always + tag: dev +resources: + requests: + memory: "1Gi" + cpu: "1.0" diff --git a/neon_diana_utils/templates/backend/Chart.yaml b/neon_diana_utils/templates/backend/Chart.yaml index 993770c8..9b91d7b4 100644 --- a/neon_diana_utils/templates/backend/Chart.yaml +++ b/neon_diana_utils/templates/backend/Chart.yaml @@ -8,5 +8,5 @@ appVersion: "1.0.1a5" dependencies: - name: backend - version: 0.1.25 + version: 0.1.27 repository: https://neongeckocom.github.io/neon-diana-utils \ No newline at end of file diff --git a/neon_diana_utils/templates/llm_personas.yml b/neon_diana_utils/templates/llm_personas.yml index f4126809..09bb5914 100644 --- a/neon_diana_utils/templates/llm_personas.yml +++ b/neon_diana_utils/templates/llm_personas.yml @@ -26,6 +26,13 @@ chat_gpt: You are an AI bot that specializes in counseling and mental health support. Provide guidance on assessments, therapy sessions, crisis intervention, goal setting, referrals, advocacy, education, documentation, and adherence to ethical standards, fostering positive changes in clients' lives. You're attempting to provide a concise response within a 40-word limit. +vllm: + - name: neon + description: | + You are NeonLLM. + - name: patent + description: | + You are PatentLLM. palm2: - name: travel_mate description: | diff --git a/neon_diana_utils/templates/mq_user_mapping.yml b/neon_diana_utils/templates/mq_user_mapping.yml index 06e28bc0..8c22404f 100644 --- a/neon_diana_utils/templates/mq_user_mapping.yml +++ b/neon_diana_utils/templates/mq_user_mapping.yml @@ -13,6 +13,8 @@ neon_libretranslate: - mq-libre-translate neon_llm_chatgpt: - neon_llm_chat_gpt +neon_llm_vllm: + - neon_llm_vllm neon_llm_fastchat: - neon_llm_fastchat neon_llm_claude: diff --git a/neon_diana_utils/templates/rmq_backend_config.yml b/neon_diana_utils/templates/rmq_backend_config.yml index c86369d7..1266490c 100644 --- a/neon_diana_utils/templates/rmq_backend_config.yml +++ b/neon_diana_utils/templates/rmq_backend_config.yml @@ -39,6 +39,12 @@ users: tags: - backend - llm + - name: "neon_llm_vllm" + password: + tags: + - backend + - llm + - vllm - name: "neon_llm_fastchat" password: tags: @@ -109,6 +115,11 @@ permissions: configure: "chat_gpt_.*" write: "chat_gpt_.*|amq\\.default" read: "chat_gpt_.*" + - user: "neon_llm_vllm" + vhost: "/llm" + configure: "vllm_.*" + write: "vllm_.*|amq\\.default" + read: "vllm_.*" - user: "neon_llm_fastchat" vhost: "/llm" configure: "fastchat_.*" diff --git a/tests/test_diana_utils.py b/tests/test_diana_utils.py index 1870ae93..383156e8 100644 --- a/tests/test_diana_utils.py +++ b/tests/test_diana_utils.py @@ -289,6 +289,7 @@ def test_get_unconfigured_backend_services(self): all_configured = {'keys': {'api_services': {'configured': True}, 'emails': {'configured': True}, 'track_my_brands': True}, + 'LLM_VLLM': {'config': False}, 'LLM_CHAT_GPT': {'config': False}, 'LLM_CLAUDE': {'': ''}, 'LLM_PALM2': 'enabled', @@ -304,7 +305,8 @@ def test_get_unconfigured_backend_services(self): self.assertEqual(disabled, {'neon-api-proxy', 'neon-brands-service', 'neon-email-proxy', 'neon-llm-chatgpt', 'neon-llm-fastchat', 'neon-llm-claude', - 'neon-llm-palm', 'neon-llm-gemini'}) + 'neon-llm-palm', 'neon-llm-gemini', + 'neon-llm-vllm'}) def test_get_optional_http_backend(self): from neon_diana_utils.configuration import _get_optional_http_backend diff --git a/tests/test_rabbitmq.json b/tests/test_rabbitmq.json index 787675fb..4c4fec38 100644 --- a/tests/test_rabbitmq.json +++ b/tests/test_rabbitmq.json @@ -131,6 +131,13 @@ "write": ".*", "read": "chat_gpt_input" }, + { + "user": "neon_llm_vllm", + "vhost": "/llm", + "configure": ".*", + "write": ".*", + "read": "vllm_input" + }, { "user": "neon_llm_fastchat", "vhost": "/llm",