Pull out client tests (#46)

* merge * latest llitellm * move tests to client * github workflow * conftests, don't start server in client tests * tests/astra-assistants/conftest.py * openai-sdk conftest fix http client * astra-assistants tests fix import * deps * tests * pplx mix skip test * command-r * skip perpmix test * don't use RunStep from SDK * fix step id and 404 check
datastax · Jun 17, 2024 · 1a352ee · 1a352ee
1 parent c7bc292
commit 1a352ee
Show file tree

Hide file tree

Showing 70 changed files with 1,880 additions and 2,409 deletions.
diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -28,15 +28,15 @@ The client library itself also has it's own test suite.
 
 The assistant-api-server repo is a python server app that heavily depends on fastAPI, pydantic, and the DataStax python driver. It relies on LiteLLM for third party LLM support and we've been very happy with their responsiveness in github as well as their ability to quickly add new models as the AI landscape evolves.
 
-The app is mostly stateless (with the exception of a db connection cache) and all authentication tokens and LLM provider configuration are passed as http headers. The streaming-assistants python library makes it easy for users to just store these configurations as environment variables and it takes care of the rest. We serve the app in production using uvicorn and scale it in kubernetes using HPA.
+The app is mostly stateless (with the exception of a db connection cache) and all authentication tokens and LLM provider configuration are passed as http headers. The astra-assistants python library makes it easy for users to just store these configurations as environment variables and it takes care of the rest. We serve the app in production using uvicorn and scale it in kubernetes using HPA.
 
 The app consists of both generated and hand written code. The generated code is based on OpenAI's openapi spec and generated with openapi-generator-cli from openapi-generator.tech. It mostly lives in the `openapi_server` directory. Leveraging the openapi spec was one of the first design decisions we made and it was a no brainer, OpenAI's spec is of very high quality (they use it to generate their SDKs) and using it ensures that all the types for all the endpoints are built correctly and enforced by pydantic.
 
 We keep track of what version of the openai openapi spec we're working with in [`OPEN_API_SPEC_HASH`](/OPEN_API_SPEC_HASH)
 
 The hand written code takes the method stubs from `open-api-server/apis` and implements them using the types from `openapi-server/models` and `openapi-server_v1/models` inside of `impl/routes` and `impl/routes_v2`. The third party LLM support is abstracted in `impl/services/inference_utils.py` and the database interactions occur in `impl/astra_vector.py`. We collect throughput, duration, and payload size metrics and export them with a prometheus exporter which is exposed with a `/metrics` endpoint. The prometheus exporter is configured to work using prom's multi-process collector to support our multi process uvicorn production deployment.
 
-Finally in the `tests` directory we have implemented tests and CI using both an http client directly (originally generated by openapi-generator.tech and tweaked manually) and custom tests that use both the openai SDK and our streaming-assistants library directly.
+Finally in the `tests` directory we have implemented tests and CI using both an http client directly (originally generated by openapi-generator.tech and tweaked manually) and custom tests that use both the openai SDK and our astra-assistants library directly.
 
 ### v2 implementation
 In impl/main.py we disambiguate between v1 and v2 openai headers and route accordingly.

diff --git a/README.md b/README.md
@@ -95,7 +95,7 @@ model="gpt-4-1106-preview"
 #model="perplexity/mixtral-8x7b-instruct"
 #model="perplexity/llama-3-sonar-large-32k-online"
 #model="anthropic.claude-v2"
-#model="gemini/gemini-pro"
+#model="gemini/gemini-1.5-pro-latest"
 #model = "meta.llama2-13b-chat-v1"
 
 assistant = client.beta.assistants.create(

diff --git a/client/.github/workflows/run-tests.yml b/client/.github/workflows/run-tests.yml
@@ -5,9 +5,9 @@ on:
 
 jobs:
 
-  run-streaming-assistants-tests-chat:
+  run-astra-assistants-tests-chat:
     runs-on: ubuntu-latest
-    name: run streaming-assistants cat tests
+    name: run astra-assistants cat tests
     env:
         ASTRA_DB_APPLICATION_TOKEN: ${{ secrets.ASTRA_DB_APPLICATION_TOKEN }}
         AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
@@ -41,9 +41,9 @@ jobs:
         run: |
           poetry run pytest -s --disable-warnings tests/test_chat_completion.py
 
-  run-streaming-assistants-tests-embedding:
+  run-astra-assistants-tests-embedding:
     runs-on: ubuntu-latest
-    name: run streaming-assistants embedding tests
+    name: run astra-assistants embedding tests
     env:
         ASTRA_DB_APPLICATION_TOKEN: ${{ secrets.ASTRA_DB_APPLICATION_TOKEN }}
         AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
@@ -76,9 +76,9 @@ jobs:
       - name: run tests
         run: |
           poetry run pytest -s --disable-warnings tests/test_embedding.py
-  run-streaming-assistants-tests-file-embedding:
+  run-astra-assistants-tests-file-embedding:
     runs-on: ubuntu-latest
-    name: run streaming-assistants file embedding tests
+    name: run astra-assistants file embedding tests
     env:
         ASTRA_DB_APPLICATION_TOKEN: ${{ secrets.ASTRA_DB_APPLICATION_TOKEN }}
         AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
@@ -111,9 +111,9 @@ jobs:
       - name: run tests
         run: |
           poetry run pytest -s --disable-warnings tests/test_file_embedding.py
-  run-streaming-assistants-tests-function-calling:
+  run-astra-assistants-tests-function-calling:
     runs-on: ubuntu-latest
-    name: run streaming-assistants function calling tests
+    name: run astra-assistants function calling tests
     env:
         ASTRA_DB_APPLICATION_TOKEN: ${{ secrets.ASTRA_DB_APPLICATION_TOKEN }}
         AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
@@ -146,9 +146,9 @@ jobs:
       - name: run tests
         run: |
           poetry run pytest -s --disable-warnings tests/test_function_calling.py
-  run-streaming-assistants-tests-run:
+  run-astra-assistants-tests-run:
     runs-on: ubuntu-latest
-    name: run streaming-assistants run tests
+    name: run astra-assistants run tests
     env:
         ASTRA_DB_APPLICATION_TOKEN: ${{ secrets.ASTRA_DB_APPLICATION_TOKEN }}
         AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
@@ -181,9 +181,9 @@ jobs:
       - name: run tests
         run: |
           poetry run pytest -s --disable-warnings tests/test_run.py
-  run-streaming-assistants-tests-run-retrieval:
+  run-astra-assistants-tests-run-retrieval:
     runs-on: ubuntu-latest
-    name: run streaming-assistants run retrieval tests
+    name: run astra-assistants run retrieval tests
     env:
         ASTRA_DB_APPLICATION_TOKEN: ${{ secrets.ASTRA_DB_APPLICATION_TOKEN }}
         AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
@@ -216,9 +216,9 @@ jobs:
       - name: run tests
         run: |
           poetry run pytest -s --disable-warnings tests/test_run_retreival.py
-  run-streaming-assistants-tests-create-and-stream-run:
+  run-astra-assistants-tests-create-and-stream-run:
     runs-on: ubuntu-latest
-    name: run streaming-assistants create and stream run tests
+    name: run astra-assistants create and stream run tests
     env:
         ASTRA_DB_APPLICATION_TOKEN: ${{ secrets.ASTRA_DB_APPLICATION_TOKEN }}
         AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
@@ -251,9 +251,9 @@ jobs:
       - name: run tests
         run: |
           poetry run pytest -s --disable-warnings tests/test_create_and_stream_run.py
-  run-streaming-assistants-tests-streaming-function-calling:
+  run-astra-assistants-tests-streaming-function-calling:
     runs-on: ubuntu-latest
-    name: run streaming-assistants streaming function calling tests
+    name: run astra-assistants streaming function calling tests
     env:
         ASTRA_DB_APPLICATION_TOKEN: ${{ secrets.ASTRA_DB_APPLICATION_TOKEN }}
         AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
@@ -286,9 +286,9 @@ jobs:
       - name: run tests
         run: |
           poetry run pytest -s --disable-warnings tests/test_streaming_function_calling.py
-  run-streaming-assistants-tests-streaming-calling-run:
+  run-astra-assistants-tests-streaming-calling-run:
     runs-on: ubuntu-latest
-    name: run streaming-assistants streaming run tests
+    name: run astra-assistants streaming run tests
     env:
         ASTRA_DB_APPLICATION_TOKEN: ${{ secrets.ASTRA_DB_APPLICATION_TOKEN }}
         AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}

diff --git a/client/examples/chat_completion.py b/client/examples/chat_completion.py
@@ -1,5 +1,5 @@
 from openai import OpenAI
-from streaming_assistants import patch
+from astra_assistants import patch
 from dotenv import load_dotenv
 
 load_dotenv('./.env')
@@ -51,5 +51,5 @@ def print_chat_completion(model):
 model="anthropic.claude-v2"
 print_chat_completion(model)
 
-model="gemini/gemini-pro"
+model="gemini/gemini-1.5-pro-latest"
 print_chat_completion(model)
diff --git a/client/examples/embedding.py b/client/examples/embedding.py
@@ -1,5 +1,5 @@
 from openai import OpenAI
-from streaming_assistants import patch
+from astra_assistants import patch
 from dotenv import load_dotenv
 
 load_dotenv('./.env')

diff --git a/client/examples/file_embedding.py b/client/examples/file_embedding.py
@@ -1,7 +1,7 @@
 from dotenv import load_dotenv
 from openai import OpenAI
 
-from streaming_assistants import patch
+from astra_assistants import patch
 
 load_dotenv("./.env")
 

diff --git a/client/examples/run.py b/client/examples/run.py
@@ -1,5 +1,5 @@
 from openai import OpenAI
-from streaming_assistants import patch
+from astra_assistants import patch
 
 from dotenv import load_dotenv
 import time
@@ -90,6 +90,6 @@ def test_run_with_assistant(assistant, client):
 gemini_assistant = client.beta.assistants.create(
     name="Gemini Animal Tutor",
     instructions=instructions,
-    model="gemini/gemini-pro",
+    model="gemini/gemini-1.5-pro-latest",
 )
 test_run_with_assistant(gemini_assistant, client)
diff --git a/client/examples/run_retreival.py b/client/examples/run_retreival.py
@@ -4,7 +4,7 @@
 from openai.lib.streaming import AssistantEventHandler
 from typing_extensions import override
 
-from streaming_assistants import patch
+from astra_assistants import patch
 
 class EventHandler(AssistantEventHandler):
     @override
@@ -108,7 +108,7 @@ def run_with_assistant(assistant, client):
 )
 run_with_assistant(claude_assistant, client)
 
-model = "gemini/gemini-pro"
+model = "gemini/gemini-1.5-pro-latest"
 name = f"{model} Math Tutor"
 
 gemini_assistant = client.beta.assistants.create(