NeonGeckoCom · NeonDaniel · Nov 1, 2024 · Oct 28, 2024 · Oct 28, 2024 · Nov 1, 2024
diff --git a/neon_diana_utils/configuration.py b/neon_diana_utils/configuration.py
@@ -95,6 +95,8 @@ def make_llm_bot_config():
         configuration['llm_bots']['gemini'] = persona_config['gemini']
     if click.confirm("Configure Claude Personas?"):
         configuration['llm_bots']['claude'] = persona_config['claude']
+    if click.confirm("Configure VLLM Personas?"):
+        configuration['llm_bots']['vllm'] = persona_config['vllm']
     return configuration
 
 
@@ -195,6 +197,38 @@ def make_keys_config(write_config: bool,
             config_confirmed = \
                 click.confirm("Is this configuration correct?")
 
+    vllm_config = dict()
+    if click.confirm("Configure VLLM?"):
+        config_confirmed = False
+        while not config_confirmed:
+            vllm_api_url = click.prompt("VLLM API URL", type=str)
+            vllm_connection_key = click.prompt("VLLM Connection Key", type=str)
+            vllm_hf_token = click.prompt("Hugging Face Auth Token", type=str)
+            vllm_role = click.prompt("VLLM Role",
+                                     type=str,
+                                     default="You are NeonLLM.")
+            vllm_context_depth = click.prompt("VLLM Context depth",
+                                              type=int,
+                                              default=4)
+            vllm_max_tokens = click.prompt("Maximum tokens in responses",
+                                           type=int,
+                                           default=512)
+            vllm_num_parallel_processes = click.prompt("Number of parallel processes",
+                                                       type=int,
+                                                       default=2)
+            vllm_config = {
+                "api_url": vllm_api_url,
+                "key": vllm_connection_key,
+                "hf_token": vllm_hf_token,
+                "role": vllm_role,
+                "context_depth": vllm_context_depth,
+                "max_tokens": vllm_max_tokens,
+                "num_parallel_processes": vllm_num_parallel_processes
+            }
+            click.echo(pformat(vllm_config))
+            config_confirmed = \
+                click.confirm("Is this configuration correct?")
+
     fastchat_config = dict()
     if click.confirm("Configure FastChat LLM?"):
         config_confirmed = False
@@ -323,6 +357,7 @@ def make_keys_config(write_config: bool,
                  "emails": email_config,
                  "track_my_brands": brands_config},
         "LLM_CHAT_GPT": chatgpt_config,
+        "LLM_VLLM": vllm_config,
         "LLM_FASTCHAT": fastchat_config,
         "LLM_PALM2": palm2_config,
         "LLM_GEMINI": gemini_config,
@@ -572,6 +607,7 @@ def _get_unconfigured_mq_backend_services(config: dict) -> Set[str]:
                          'keys.emails': 'neon-email-proxy',
                          'keys.track_my_brands': 'neon-brands-service',
                          'LLM_CHAT_GPT': 'neon-llm-chatgpt',
+                         'LLM_VLLM': 'neon-llm-vllm',
                          'LLM_FASTCHAT': 'neon-llm-fastchat',
                          'LLM_CLAUDE': 'neon-llm-claude',
                          'LLM_GEMINI': 'neon-llm-gemini',

diff --git a/neon_diana_utils/docker/backend/docker-compose.yml b/neon_diana_utils/docker/backend/docker-compose.yml
@@ -64,6 +64,20 @@ services:
       - XDG_DATA_HOME=/xdg/data
       - XDG_CACHE_HOME=/xdg/cache
       - XDG_STATE_HOME=/xdg/state
+  neon-llm-vllm:
+    container_name: neon-llm-vllm
+    image: ghcr.io/neongeckocom/neon-llm-vllm:${MQ_IMAGE_TAG}
+    depends_on:
+      - neon-rabbitmq
+    networks:
+      - diana-backend
+    volumes:
+      - xdg:/xdg:rw
+    environment:
+      - XDG_CONFIG_HOME=/xdg/config
+      - XDG_DATA_HOME=/xdg/data
+      - XDG_CACHE_HOME=/xdg/cache
+      - XDG_STATE_HOME=/xdg/state
   neon-llm-fastchat:
     container_name: neon-llm-fastchat
     image: ghcr.io/neongeckocom/neon-llm-fastchat:${MQ_IMAGE_TAG}

diff --git a/neon_diana_utils/helm_charts/backend/diana-backend/Chart.yaml b/neon_diana_utils/helm_charts/backend/diana-backend/Chart.yaml
@@ -15,7 +15,7 @@ type: application
 # This is the chart version. This version number should be incremented each time you make changes
 # to the chart and its templates, including the app version.
 # Versions are expected to follow Semantic Versioning (https://semver.org/)
-version: 0.1.25
+version: 0.1.27
 
 # This is the version number of the application being deployed. This version number should be
 # incremented each time you make changes to the application. Versions are not expected to
@@ -35,5 +35,5 @@ dependencies:
     version: 0.0.14
     repository: file://../http-services
   - name: diana-mq
-    version: 0.0.17
+    version: 0.0.18
     repository: file://../mq-services
diff --git a/neon_diana_utils/helm_charts/backend/mq-services/Chart.yaml b/neon_diana_utils/helm_charts/backend/mq-services/Chart.yaml
@@ -3,7 +3,7 @@ name: diana-mq
 description: Deploy DIANA MQ Services
 
 type: application
-version: 0.0.17
+version: 0.0.18
 appVersion: "1.0.1a26"
 dependencies:
   - name: neon-api-proxy
@@ -30,6 +30,10 @@ dependencies:
     alias: neon-llm-chatgpt
     version: 0.0.7
     repository: file://../../mq/neon-llm-chatgpt
+  - name: neon-llm-vllm
+    alias: neon-llm-vllm
+    version: 0.0.1
+    repository: file://../../mq/neon-llm-vllm
   - name: neon-llm-fastchat
     alias: neon-llm-fastchat
     version: 0.0.6

diff --git a/neon_diana_utils/helm_charts/backend/mq-services/values.yaml b/neon_diana_utils/helm_charts/backend/mq-services/values.yaml
@@ -18,6 +18,9 @@ neon-script-parser:
 neon-llm-chatgpt:
   image:
     tag: *tag
+neon-llm-vllm:
+  image:
+    tag: *tag
 neon-llm-fastchat:
   image:
     tag: *tag
diff --git a/neon_diana_utils/helm_charts/mq/neon-llm-vllm/.helmignore b/neon_diana_utils/helm_charts/mq/neon-llm-vllm/.helmignore
@@ -0,0 +1,23 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
diff --git a/neon_diana_utils/helm_charts/mq/neon-llm-vllm/Chart.yaml b/neon_diana_utils/helm_charts/mq/neon-llm-vllm/Chart.yaml
@@ -0,0 +1,12 @@
+apiVersion: v2
+name: neon-llm-vllm
+description: Deploy an LLM proxy for Neon VLLM Service
+
+type: application
+version: 0.0.1
+appVersion: "1.0.1a25"
+
+dependencies:
+  - name: base-mq
+    version: 0.0.10
+    repository: file://../../base/base-mq
diff --git a/neon_diana_utils/helm_charts/mq/neon-llm-vllm/templates/_helpers.tpl b/neon_diana_utils/helm_charts/mq/neon-llm-vllm/templates/_helpers.tpl
@@ -0,0 +1,62 @@
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "neon-llm-service.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Create a default fully qualified app name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+If release name contains chart name it will be used as a full name.
+*/}}
+{{- define "neon-llm-service.fullname" -}}
+{{- if .Values.fullnameOverride }}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- $name := default .Chart.Name .Values.nameOverride }}
+{{- if contains $name .Release.Name }}
+{{- .Release.Name | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
+{{- end }}
+{{- end }}
+{{- end }}
+
+{{/*
+Create chart name and version as used by the chart label.
+*/}}
+{{- define "neon-llm-service.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Common labels
+*/}}
+{{- define "neon-llm-service.labels" -}}
+helm.sh/chart: {{ include "neon-llm-service.chart" . }}
+{{ include "neon-llm-service.selectorLabels" . }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- end }}
+
+{{/*
+Selector labels
+*/}}
+{{- define "neon-llm-service.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "neon-llm-service.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+{{- end }}
+
+{{/*
+Create the name of the service account to use
+*/}}
+{{- define "neon-llm-service.serviceAccountName" -}}
+{{- if .Values.serviceAccount.create }}
+{{- default (include "neon-llm-service.fullname" .) .Values.serviceAccount.name }}
+{{- else }}
+{{- default "default" .Values.serviceAccount.name }}
+{{- end }}
+{{- end }}
diff --git a/neon_diana_utils/helm_charts/mq/neon-llm-vllm/templates/deployment.yaml b/neon_diana_utils/helm_charts/mq/neon-llm-vllm/templates/deployment.yaml
@@ -0,0 +1 @@
+{{- include "base-mq.deployment" .}}
diff --git a/neon_diana_utils/helm_charts/mq/neon-llm-vllm/templates/service.yaml b/neon_diana_utils/helm_charts/mq/neon-llm-vllm/templates/service.yaml
@@ -0,0 +1 @@
+{{- include "base-mq.service" .}}
diff --git a/neon_diana_utils/helm_charts/mq/neon-llm-vllm/values.yaml b/neon_diana_utils/helm_charts/mq/neon-llm-vllm/values.yaml
@@ -0,0 +1,11 @@
+serviceName: backend-llm-vllm
+replicaCount: 1
+configSecret: diana-config
+image:
+  repository: ghcr.io/neongeckocom/neon-llm-vllm
+  pullPolicy: Always
+  tag: dev
+resources:
+  requests:
+    memory: "1Gi"
+    cpu: "1.0"
diff --git a/neon_diana_utils/templates/backend/Chart.yaml b/neon_diana_utils/templates/backend/Chart.yaml
@@ -8,5 +8,5 @@ appVersion: "1.0.1a5"
 
 dependencies:
   - name: backend
-    version: 0.1.25
+    version: 0.1.27
     repository: https://neongeckocom.github.io/neon-diana-utils
diff --git a/neon_diana_utils/templates/llm_personas.yml b/neon_diana_utils/templates/llm_personas.yml
@@ -26,6 +26,13 @@ chat_gpt:
       You are an AI bot that specializes in counseling and mental health support.
       Provide guidance on assessments, therapy sessions, crisis intervention, goal setting, referrals, advocacy, education, documentation, and adherence to ethical standards, fostering positive changes in clients' lives.
       You're attempting to provide a concise response within a 40-word limit.
+vllm:
+  - name: neon
+    description: |
+      You are NeonLLM.
+  - name: patent
+    description: |
+      You are PatentLLM.
 palm2:
   - name: travel_mate
     description: |

diff --git a/neon_diana_utils/templates/mq_user_mapping.yml b/neon_diana_utils/templates/mq_user_mapping.yml
@@ -13,6 +13,8 @@ neon_libretranslate:
   - mq-libre-translate
 neon_llm_chatgpt:
   - neon_llm_chat_gpt
+neon_llm_vllm:
+  - neon_llm_vllm
 neon_llm_fastchat:
   - neon_llm_fastchat
 neon_llm_claude:

diff --git a/neon_diana_utils/templates/rmq_backend_config.yml b/neon_diana_utils/templates/rmq_backend_config.yml
@@ -39,6 +39,12 @@ users:
     tags:
       - backend
       - llm
+  - name: "neon_llm_vllm"
+    password:
+    tags:
+      - backend
+      - llm
+      - vllm
   - name: "neon_llm_fastchat"
     password:
     tags:
@@ -109,6 +115,11 @@ permissions:
     configure: "chat_gpt_.*"
     write: "chat_gpt_.*|amq\\.default"
     read: "chat_gpt_.*"
+  - user: "neon_llm_vllm"
+    vhost: "/llm"
+    configure: "vllm_.*"
+    write: "vllm_.*|amq\\.default"
+    read: "vllm_.*"
   - user: "neon_llm_fastchat"
     vhost: "/llm"
     configure: "fastchat_.*"

diff --git a/tests/test_diana_utils.py b/tests/test_diana_utils.py
@@ -289,6 +289,7 @@ def test_get_unconfigured_backend_services(self):
         all_configured = {'keys': {'api_services': {'configured': True},
                                    'emails': {'configured': True},
                                    'track_my_brands': True},
+                          'LLM_VLLM': {'config': False},
                           'LLM_CHAT_GPT': {'config': False},
                           'LLM_CLAUDE': {'': ''},
                           'LLM_PALM2': 'enabled',
@@ -304,7 +305,8 @@ def test_get_unconfigured_backend_services(self):
         self.assertEqual(disabled, {'neon-api-proxy', 'neon-brands-service',
                                     'neon-email-proxy', 'neon-llm-chatgpt',
                                     'neon-llm-fastchat', 'neon-llm-claude',
-                                    'neon-llm-palm', 'neon-llm-gemini'})
+                                    'neon-llm-palm', 'neon-llm-gemini',
+                                    'neon-llm-vllm'})
 
     def test_get_optional_http_backend(self):
         from neon_diana_utils.configuration import _get_optional_http_backend

diff --git a/tests/test_rabbitmq.json b/tests/test_rabbitmq.json
@@ -131,6 +131,13 @@
       "write": ".*",
       "read": "chat_gpt_input"
     },
+    {
+      "user": "neon_llm_vllm",
+      "vhost": "/llm",
+      "configure": ".*",
+      "write": ".*",
+      "read": "vllm_input"
+    },
     {
       "user": "neon_llm_fastchat",
       "vhost": "/llm",