Merge branch 'master' into DOCS-1075

wandb · Jan 7, 2025 · 5929cf9 · 5929cf9
2 parents bda54e4 + fb3e3a7
commit 5929cf9
Show file tree

Hide file tree

Showing 352 changed files with 18,688 additions and 5,107 deletions.
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
@@ -81,11 +81,16 @@ jobs:
         env:
           CI: 1
           WANDB_ENABLE_TEST_CONTAINER: true
+          LOGGING_ENABLED: true
         ports:
           - '8080:8080'
           - '8083:8083'
           - '9015:9015'
-        options: --health-cmd "curl --fail http://localhost:8080/healthz || exit 1" --health-interval=5s --health-timeout=3s
+        options: >-
+          --health-cmd "wget -q -O /dev/null http://localhost:8080/healthz || exit 1"
+          --health-interval=5s
+          --health-timeout=3s
+          --health-start-period=10s
     outputs:
       tests_should_run: ${{ steps.test_check.outputs.tests_should_run }}
     steps:
@@ -254,11 +259,16 @@ jobs:
         env:
           CI: 1
           WANDB_ENABLE_TEST_CONTAINER: true
+          LOGGING_ENABLED: true
         ports:
           - '8080:8080'
           - '8083:8083'
           - '9015:9015'
-        options: --health-cmd "curl --fail http://localhost:8080/healthz || exit 1" --health-interval=5s --health-timeout=3s
+        options: >-
+          --health-cmd "wget -q -O /dev/null http://localhost:8080/healthz || exit 1"
+          --health-interval=5s
+          --health-timeout=3s
+          --health-start-period=10s
       weave_clickhouse:
         image: clickhouse/clickhouse-server
         ports:
@@ -267,6 +277,8 @@ jobs:
     steps:
       - name: Checkout
         uses: actions/checkout@v3
+      - name: Enable debug logging
+        run: echo "ACTIONS_STEP_DEBUG=true" >> $GITHUB_ENV
       - name: Set up Python ${{ matrix.python-version-major }}.${{ matrix.python-version-minor }}
         uses: actions/setup-python@v5
         with:

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -16,7 +16,7 @@ repos:
     hooks:
       - id: mypy
         additional_dependencies:
-          [types-pkg-resources==0.1.3, types-all, wandb>=0.15.5]
+          [types-pkg-resources==0.1.3, types-all, wandb>=0.15.5, wandb<0.19.0]
         # Note: You have to update pyproject.toml[tool.mypy] too!
         args: ["--config-file=pyproject.toml"]
         exclude: (.*pyi$)|(weave_query)|(tests)|(examples)

diff --git a/dev_docs/BaseObjectClasses.md → dev_docs/BuiltinObjectClasses.md b/dev_docs/BaseObjectClasses.md → dev_docs/BuiltinObjectClasses.md
@@ -1,4 +1,4 @@
-# BaseObjectClasses
+# BuiltinObjectClasses
 
 ## Refresher on Objects and object storage
 
@@ -79,11 +79,11 @@ While many Weave Objects are free-form and user-defined, there is often a need f
 
 Here's how to define and use a validated base object:
 
-1. **Define your schema** (in `weave/trace_server/interface/base_object_classes/your_schema.py`):
+1. **Define your schema** (in `weave/trace_server/interface/builtin_object_classes/your_schema.py`):
 
 ```python
 from pydantic import BaseModel
-from weave.trace_server.interface.base_object_classes import base_object_def
+from weave.trace_server.interface.builtin_object_classes import base_object_def
 
 class NestedConfig(BaseModel):
     setting_a: int
@@ -116,7 +116,7 @@ curl -X POST 'https://trace.wandb.ai/obj/create' \
       "project_id": "user/project",
       "object_id": "my_config",
       "val": {...},
-      "set_base_object_class": "MyConfig"
+      "object_class": "MyConfig"
     }
   }'
 
@@ -154,38 +154,38 @@ Run `make synchronize-base-object-schemas` to ensure the frontend TypeScript typ
 
 ### Architecture Flow
 
-1. Define your schema in a python file in the `weave/trace_server/interface/base_object_classes/test_only_example.py` directory. See `weave/trace_server/interface/base_object_classes/test_only_example.py` as an example.
-2. Make sure to register your schemas in `weave/trace_server/interface/base_object_classes/base_object_registry.py` by calling `register_base_object`.
+1. Define your schema in a python file in the `weave/trace_server/interface/builtin_object_classes/test_only_example.py` directory. See `weave/trace_server/interface/builtin_object_classes/test_only_example.py` as an example.
+2. Make sure to register your schemas in `weave/trace_server/interface/builtin_object_classes/builtin_object_registry.py` by calling `register_base_object`.
 3. Run `make synchronize-base-object-schemas` to generate the frontend types.
-    * The first step (`make generate_base_object_schemas`) will run `weave/scripts/generate_base_object_schemas.py` to generate a JSON schema in `weave/trace_server/interface/base_object_classes/generated/generated_base_object_class_schemas.json`.
-    * The second step (yarn `generate-schemas`) will read this file and use it to generate the frontend types located in `weave-js/src/components/PagePanelComponents/Home/Browse3/pages/wfReactInterface/generatedBaseObjectClasses.zod.ts`.
+    * The first step (`make generate_base_object_schemas`) will run `weave/scripts/generate_base_object_schemas.py` to generate a JSON schema in `weave/trace_server/interface/builtin_object_classes/generated/generated_builtin_object_class_schemas.json`.
+    * The second step (yarn `generate-schemas`) will read this file and use it to generate the frontend types located in `weave-js/src/components/PagePanelComponents/Home/Browse3/pages/wfReactInterface/generatedBuiltinObjectClasses.zod.ts`.
 4. Now, each use case uses different parts:
     1. `Python Writing`. Users can directly import these classes and use them as normal Pydantic models, which get published with `weave.publish`. The python client correct builds the requisite payload.
     2. `Python Reading`. Users can `weave.ref().get()` and the weave python SDK will return the instance with the correct type. Note: we do some special handling such that the returned object is not a WeaveObject, but literally the exact pydantic class.
-    3. `HTTP Writing`. In cases where the client/user does not want to add the special type information, users can publish base objects by setting the `set_base_object_class` setting on `POST obj/create` to the name of the class. The weave server will validate the object against the schema, update the metadata fields, and store the object.
+    3. `HTTP Writing`. In cases where the client/user does not want to add the special type information, users can publish builtin objects (set of weave.Objects provided by Weave) by setting the `builtin_object_class` setting on `POST obj/create` to the name of the class. The weave server will validate the object against the schema, update the metadata fields, and store the object.
     4. `HTTP Reading`. When querying for objects, the server will return the object with the correct type if the `base_object_class` metadata field is set.
-    5. `Frontend`. The frontend will read the zod schema from `weave-js/src/components/PagePanelComponents/Home/Browse3/pages/wfReactInterface/generatedBaseObjectClasses.zod.ts` and use that to provide compile time type safety when using `useBaseObjectInstances` and runtime type safety when using `useCreateBaseObjectInstance`.
+    5. `Frontend`. The frontend will read the zod schema from `weave-js/src/components/PagePanelComponents/Home/Browse3/pages/wfReactInterface/generatedBuiltinObjectClasses.zod.ts` and use that to provide compile time type safety when using `useBaseObjectInstances` and runtime type safety when using `useCreateBaseObjectInstance`.
 * Note: it is critical that all techniques produce the same digest for the same data - which is tested in the tests. This way versions are not thrashed by different clients/users.
 
 ```mermaid
 graph TD
     subgraph Schema Definition
         F["weave/trace_server/interface/<br>base_object_classes/your_schema.py"] --> |defines| P[Pydantic BaseObject]
-        P --> |register_base_object| R["base_object_registry.py"]
+        P --> |register_base_object| R["builtin_object_registry.py"]
     end
 
     subgraph Schema Generation
         M["make synchronize-base-object-schemas"] --> G["make generate_base_object_schemas"]
         G --> |runs| S["weave/scripts/<br>generate_base_object_schemas.py"]
         R --> |import registered classes| S
-        S --> |generates| J["generated_base_object_class_schemas.json"]
-        M --> |yarn generate-schemas| Z["generatedBaseObjectClasses.zod.ts"]
+        S --> |generates| J["generated_builtin_object_class_schemas.json"]
+        M --> |yarn generate-schemas| Z["generatedBuiltinObjectClasses.zod.ts"]
         J --> Z
     end
 
     subgraph "Trace Server"
         subgraph "HTTP API"
-            R --> |validates using| HW["POST obj/create<br>set_base_object_class"]
+            R --> |validates using| HW["POST obj/create<br>object_class"]
             HW --> DB[(Weave Object Store)]
             HR["POST objs/query<br>base_object_classes"] --> |Filters base_object_class| DB
         end
@@ -203,7 +203,7 @@ graph TD
         Z --> |import| UBI["useBaseObjectInstances"]
         Z --> |import| UCI["useCreateBaseObjectInstance"]
         UBI --> |Filters base_object_class| HR
-        UCI --> |set_base_object_class| HW
+        UCI --> |object_class| HW
         UI[React UI] --> UBI
         UI --> UCI
     end

diff --git a/dev_docs/RELEASE.md b/dev_docs/RELEASE.md
@@ -6,7 +6,8 @@ This document outlines how to publish a new Weave release to our public [PyPI pa
 
 2. You should also run through this [sample notebook](https://colab.research.google.com/drive/1DmkLzhFCFC0OoN-ggBDoG1nejGw2jQZy#scrollTo=29hJrcJQA7jZ) remember to install from master. You can also just run the [quickstart](http://wandb.me/weave_colab).
 
-3. To prepare a PATCH release, go to GitHub Actions and run the `bump-python-sdk-version` workflow on master. This will:
+3. To prepare a PATCH release, go to GitHub Actions and run the [bump-python-sdk-version](https://github.com/wandb/weave/actions/workflows/bump_version.yaml) workflow on master. This will:
+
    - Create a new patch version by dropping the pre-release (e.g., `x.y.z-dev0` -> `x.y.z`) and tag this commit with `x.y.z`
    - Create a new dev version by incrementing the dev version (e.g., `x.y.z` -> `x.y.(z+1)-dev0`) and commit this to master
    - Both of these commits will be pushed to master
@@ -16,6 +17,6 @@ This document outlines how to publish a new Weave release to our public [PyPI pa
 
 5. Verify the new version of Weave exists in [PyPI](https://pypi.org/project/weave/) once it is complete.
 
-6. Go to GitHub, click the release tag, and click `Draft a New Release`. Select the new tag, and click generate release notes. Publish the release.
+6. Go to the [GitHub new release page](https://github.com/wandb/weave/releases/new). Select the new tag, and click "Generate release notes". Publish the release.
 
 7. Finally, announce that the merge freeze is over.
diff --git a/docs/docs/guides/evaluation/scorers.md b/docs/docs/guides/evaluation/scorers.md
@@ -224,9 +224,9 @@ In Weave, Scorers are used to evaluate AI outputs and return evaluation metrics.
     ```
 
     ### Mapping Column Names with `columnMapping`
-    :::warning
+    :::important
 
-    In TypeScript, this feature is currently on the `Evaluation` object, not individual scorers!
+    In TypeScript, this feature is currently on the `Evaluation` object, not individual scorers.
 
     :::
 
@@ -455,7 +455,7 @@ In Weave, Scorers are used to evaluate AI outputs and return evaluation metrics.
     from weave.scorers import OpenAIModerationScorer
     from openai import OpenAI
 
-    oai_client = OpenAI(api_key=...) # initialize your LLM client here
+    oai_client = OpenAI() # initialize your LLM client here
 
     scorer = OpenAIModerationScorer(
         client=oai_client,

diff --git a/docs/docs/guides/integrations/azure.md b/docs/docs/guides/integrations/azure.md
@@ -0,0 +1,26 @@
+# Microsoft Azure
+
+Weights & Biases integrates with Microsoft Azure OpenAI services, helping teams to manage, debug, and optimize their Azure AI workflows at scale. This guide introduces the W&B integration, what it means for Weave users, its key features, and how to get started.
+
+## Key features
+
+- **LLM evaluations**: Evaluate and monitor LLM-powered applications using Weave, optimized for Azure infrastructure.  
+- **Seamless integration**: Deploy W&B Models on a dedicated Azure tenant with built-in integrations for Azure AI Studio, Azure ML, Azure OpenAI Service, and other Azure AI services.  
+- **Enhanced performance**: Use Azure’s infrastructure to train and deploy models faster, with auto-scaling clusters and optimized resources.  
+- **Scalable experiment tracking**: Automatically log hyperparameters, metrics, and artifacts for Azure AI Studio and Azure ML runs.  
+- **LLM fine-tuning**: Fine-tune models with W&B Models.
+- **Central repository for models and datasets**: Manage and version models and datasets with W&B Registry and Azure AI Studio.  
+- **Collaborative workspaces**: Support teamwork with shared workspaces, experiment commenting, and Microsoft Teams integration.  
+- **Governance framework**: Ensure security with fine-grained access controls, audit trails, and Microsoft Entra ID integration.  
+
+## Getting started
+
+To use W&B with Azure, add the W&B integration via the [Azure Marketplace](https://azuremarketplace.microsoft.com/en-us/marketplace/apps/weightsandbiasesinc1641502883483.weights_biases_for_azure?tab=Overview).
+
+For a detailed guide describing how to integrate Azure OpenAI fine-tuning with W&B, see [Integrating Weights & Biases with Azure AI Services](https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/weights-and-biases-integration).
+
+## Learn more
+
+- [Weights & Biases + Microsoft Azure Overview](https://wandb.ai/site/partners/azure)
+- [How W&B and Microsoft Azure Are Empowering Enterprises](https://techcommunity.microsoft.com/blog/azure-ai-services-blog/how-weights--biases-and-microsoft-azure-are-empowering-enterprises-to-fine-tune-/4303716)
+- [Microsoft Azure OpenAI Service Documentation](https://learn.microsoft.com/en-us/azure/ai-services/openai/)
diff --git a/docs/docs/guides/integrations/imgs/chatnvidia_model.png b/docs/docs/guides/integrations/imgs/chatnvidia_model.png
diff --git a/docs/docs/guides/integrations/imgs/chatnvidia_trace.png b/docs/docs/guides/integrations/imgs/chatnvidia_trace.png
diff --git a/docs/docs/guides/integrations/imgs/nvidia_pokedex.png b/docs/docs/guides/integrations/imgs/nvidia_pokedex.png
diff --git a/docs/docs/guides/integrations/index.md b/docs/docs/guides/integrations/index.md
@@ -16,11 +16,14 @@ LLM providers are the vendors that offer access to large language models for gen
 - **[Cerebras](/guides/integrations/cerebras)**
 - **[Cohere](/guides/integrations/cohere)**
 - **[MistralAI](/guides/integrations/mistral)**
+- **[Microsoft Azure](/guides/integrations/azure)**
 - **[Google Gemini](/guides/integrations/google-gemini)**
 - **[Together AI](/guides/integrations/together_ai)**
 - **[Groq](/guides/integrations/groq)**
 - **[Open Router](/guides/integrations/openrouter)**
 - **[LiteLLM](/guides/integrations/litellm)**
+- **[NVIDIA NIM](/guides/integrations/nvidia_nim)**
+
 
 
 **[Local Models](/guides/integrations/local_models)**: For when you're running models on your own infrastructure.

diff --git a/docs/docs/guides/integrations/local_models.md b/docs/docs/guides/integrations/local_models.md
@@ -14,7 +14,6 @@ First and most important, is the `base_url` change during the `openai.OpenAI()`
 
 ```python
 client = openai.OpenAI(
-    api_key='fake',
     base_url="http://localhost:1234",
 )
 ```

diff --git a/docs/docs/guides/integrations/notdiamond.md b/docs/docs/guides/integrations/notdiamond.md
@@ -68,7 +68,6 @@ preference_id = train_router(
     response_column="actual",
     language="en",
     maximize=True,
-    api_key=api_key,
 )
 ```