Added configs for Neon VLLM service

NeonGeckoCom · Oct 28, 2024 · 46e59c4 · 46e59c4
1 parent e21d057
commit 46e59c4
Show file tree

Hide file tree

Showing 13 changed files with 189 additions and 0 deletions.
diff --git a/neon_diana_utils/configuration.py b/neon_diana_utils/configuration.py
@@ -93,6 +93,8 @@ def make_llm_bot_config():
         configuration['llm_bots']['gemini'] = persona_config['gemini']
     if click.confirm("Configure Claude Personas?"):
         configuration['llm_bots']['claude'] = persona_config['claude']
+    if click.confirm("Configure VLLM Personas?"):
+        configuration['llm_bots']['vllm'] = persona_config['vllm']
     return configuration
 
 
@@ -193,6 +195,40 @@ def make_keys_config(write_config: bool,
             config_confirmed = \
                 click.confirm("Is this configuration correct?")
 
+    vllm_config = dict()
+    if click.confirm("Configure VLLM?"):
+        config_confirmed = False
+        while not config_confirmed:
+            vllm_api_url = click.prompt("VLLM API URL", type=str)
+            vllm_connection_key = click.prompt("VLLM Connection Key", type=str)
+            vllm_hf_token = click.prompt("Hugging Face Auth Token", type=str)
+            vllm_role = click.prompt("VLLM Role",
+                                     type=str,
+                                     default="You are NeonLLM."
+                                             "You are trying to give a short "
+                                             "answer in less than 40 words.")
+            vllm_context_depth = click.prompt("VLLM Context depth",
+                                              type=int,
+                                              default=4)
+            vllm_max_tokens = click.prompt("Maximum tokens in responses",
+                                      type=int,
+                                      default=100)
+            vllm_num_parallel_processes = click.prompt("Number of parallel processes",
+                                                       type=int,
+                                                       default=2)
+            vllm_config = {
+                "api_url": vllm_api_url,
+                "key": vllm_connection_key,
+                "hf_token": vllm_hf_token,
+                "role": vllm_role,
+                "context_depth": vllm_context_depth,
+                "max_tokens": vllm_max_tokens,
+                "num_parallel_processes": vllm_num_parallel_processes
+            }
+            click.echo(pformat(vllm_config))
+            config_confirmed = \
+                click.confirm("Is this configuration correct?")
+
     fastchat_config = dict()
     if click.confirm("Configure FastChat LLM?"):
         config_confirmed = False
@@ -300,6 +336,7 @@ def make_keys_config(write_config: bool,
                        "emails": email_config,
                        "track_my_brands": brands_config},
               "LLM_CHAT_GPT": chatgpt_config,
+              "LLM_VLLM": vllm_config,
               "LLM_FASTCHAT": fastchat_config,
               "LLM_PALM2": palm2_config,
               "LLM_GEMINI": gemini_config,
@@ -544,6 +581,7 @@ def _get_unconfigured_mq_backend_services(config: dict) -> Set[str]:
                          'keys.emails': 'neon-email-proxy',
                          'keys.track_my_brands': 'neon-brands-service',
                          'LLM_CHAT_GPT': 'neon-llm-chatgpt',
+                         'LLM_VLLM': 'neon-llm-vllm',
                          'LLM_FASTCHAT': 'neon-llm-fastchat',
                          'LLM_CLAUDE': 'neon-llm-claude',
                          'LLM_GEMINI': 'neon-llm-gemini',

diff --git a/neon_diana_utils/docker/backend/docker-compose.yml b/neon_diana_utils/docker/backend/docker-compose.yml
@@ -64,6 +64,20 @@ services:
       - XDG_DATA_HOME=/xdg/data
       - XDG_CACHE_HOME=/xdg/cache
       - XDG_STATE_HOME=/xdg/state
+  neon-llm-vllm:
+    container_name: neon-llm-vllm
+    image: ghcr.io/neongeckocom/neon-llm-vllm:${MQ_IMAGE_TAG}
+    depends_on:
+      - neon-rabbitmq
+    networks:
+      - diana-backend
+    volumes:
+      - xdg:/xdg:rw
+    environment:
+      - XDG_CONFIG_HOME=/xdg/config
+      - XDG_DATA_HOME=/xdg/data
+      - XDG_CACHE_HOME=/xdg/cache
+      - XDG_STATE_HOME=/xdg/state
   neon-llm-fastchat:
     container_name: neon-llm-fastchat
     image: ghcr.io/neongeckocom/neon-llm-fastchat:${MQ_IMAGE_TAG}

diff --git a/neon_diana_utils/helm_charts/backend/mq-services/Chart.yaml b/neon_diana_utils/helm_charts/backend/mq-services/Chart.yaml
@@ -30,6 +30,10 @@ dependencies:
     alias: neon-llm-chatgpt
     version: 0.0.7
     repository: file://../../mq/neon-llm-chatgpt
+  - name: neon-llm-vllm
+    alias: neon-llm-vllm
+    version: 0.0.7
+    repository: file://../../mq/neon-llm-vllm
   - name: neon-llm-fastchat
     alias: neon-llm-fastchat
     version: 0.0.6

diff --git a/neon_diana_utils/helm_charts/backend/mq-services/values.yaml b/neon_diana_utils/helm_charts/backend/mq-services/values.yaml
@@ -18,6 +18,9 @@ neon-script-parser:
 neon-llm-chatgpt:
   image:
     tag: *tag
+neon-llm-vllm:
+  image:
+    tag: *tag
 neon-llm-fastchat:
   image:
     tag: *tag
diff --git a/neon_diana_utils/helm_charts/mq/neon-llm-vllm/.helmignore b/neon_diana_utils/helm_charts/mq/neon-llm-vllm/.helmignore
@@ -0,0 +1,23 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
diff --git a/neon_diana_utils/helm_charts/mq/neon-llm-vllm/Chart.yaml b/neon_diana_utils/helm_charts/mq/neon-llm-vllm/Chart.yaml
@@ -0,0 +1,12 @@
+apiVersion: v2
+name: neon-llm-vllm
+description: Deploy an LLM proxy for Neon VLLM Service
+
+type: application
+version: 0.0.7
+appVersion: "1.0.1a25"
+
+dependencies:
+  - name: base-mq
+    version: 0.0.10
+    repository: file://../../base/base-mq
diff --git a/neon_diana_utils/helm_charts/mq/neon-llm-vllm/templates/_helpers.tpl b/neon_diana_utils/helm_charts/mq/neon-llm-vllm/templates/_helpers.tpl
@@ -0,0 +1,62 @@
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "neon-llm-service.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Create a default fully qualified app name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+If release name contains chart name it will be used as a full name.
+*/}}
+{{- define "neon-llm-service.fullname" -}}
+{{- if .Values.fullnameOverride }}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- $name := default .Chart.Name .Values.nameOverride }}
+{{- if contains $name .Release.Name }}
+{{- .Release.Name | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
+{{- end }}
+{{- end }}
+{{- end }}
+
+{{/*
+Create chart name and version as used by the chart label.
+*/}}
+{{- define "neon-llm-service.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Common labels
+*/}}
+{{- define "neon-llm-service.labels" -}}
+helm.sh/chart: {{ include "neon-llm-service.chart" . }}
+{{ include "neon-llm-service.selectorLabels" . }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- end }}
+
+{{/*
+Selector labels
+*/}}
+{{- define "neon-llm-service.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "neon-llm-service.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+{{- end }}
+
+{{/*
+Create the name of the service account to use
+*/}}
+{{- define "neon-llm-service.serviceAccountName" -}}
+{{- if .Values.serviceAccount.create }}
+{{- default (include "neon-llm-service.fullname" .) .Values.serviceAccount.name }}
+{{- else }}
+{{- default "default" .Values.serviceAccount.name }}
+{{- end }}
+{{- end }}
diff --git a/neon_diana_utils/helm_charts/mq/neon-llm-vllm/templates/deployment.yaml b/neon_diana_utils/helm_charts/mq/neon-llm-vllm/templates/deployment.yaml
@@ -0,0 +1 @@
+{{- include "base-mq.deployment" .}}
diff --git a/neon_diana_utils/helm_charts/mq/neon-llm-vllm/templates/service.yaml b/neon_diana_utils/helm_charts/mq/neon-llm-vllm/templates/service.yaml
@@ -0,0 +1 @@
+{{- include "base-mq.service" .}}
diff --git a/neon_diana_utils/helm_charts/mq/neon-llm-vllm/values.yaml b/neon_diana_utils/helm_charts/mq/neon-llm-vllm/values.yaml
@@ -0,0 +1,11 @@
+serviceName: backend-llm-vllm
+replicaCount: 1
+configSecret: diana-config
+image:
+  repository: ghcr.io/neongeckocom/neon-llm-vllm
+  pullPolicy: Always
+  tag: dev
+resources:
+  requests:
+    memory: "1Gi"
+    cpu: "1.0"
diff --git a/neon_diana_utils/templates/llm_personas.yml b/neon_diana_utils/templates/llm_personas.yml
@@ -26,6 +26,13 @@ chat_gpt:
       You are an AI bot that specializes in counseling and mental health support.
       Provide guidance on assessments, therapy sessions, crisis intervention, goal setting, referrals, advocacy, education, documentation, and adherence to ethical standards, fostering positive changes in clients' lives.
       You're attempting to provide a concise response within a 40-word limit.
+vllm:
+  - name: neon
+    description: |
+      You are NeonLLM.
+  - name: patent
+    description: |
+      You are PatentLLM.
 palm2:
   - name: travel_mate
     description: |

diff --git a/neon_diana_utils/templates/mq_user_mapping.yml b/neon_diana_utils/templates/mq_user_mapping.yml
@@ -13,6 +13,8 @@ neon_libretranslate:
   - mq-libre-translate
 neon_llm_chatgpt:
   - neon_llm_chat_gpt
+neon_llm_vllm:
+  - neon_llm_vllm
 neon_llm_fastchat:
   - neon_llm_fastchat
 neon_llm_claude:

diff --git a/neon_diana_utils/templates/rmq_backend_config.yml b/neon_diana_utils/templates/rmq_backend_config.yml
@@ -39,6 +39,12 @@ users:
     tags:
       - backend
       - llm
+  - name: "neon_llm_vllm"
+    password:
+    tags:
+      - backend
+      - llm
+      - vllm
   - name: "neon_llm_fastchat"
     password:
     tags:
@@ -109,6 +115,11 @@ permissions:
     configure: "chat_gpt_.*"
     write: "chat_gpt_.*|amq\\.default"
     read: "chat_gpt_.*"
+  - user: "neon_llm_vllm"
+    vhost: "/llm"
+    configure: "vllm_.*"
+    write: "vllm_.*|amq\\.default"
+    read: "vllm_.*"
   - user: "neon_llm_fastchat"
     vhost: "/llm"
     configure: "fastchat_.*"