From 5594403a2ae09bd0aa46e749b9a8ee8d56cd9932 Mon Sep 17 00:00:00 2001
From: Stefano Lottini <stefano.lottini@datastax.com>
Date: Tue, 1 Oct 2024 23:45:48 +0200
Subject: [PATCH 1/3] added [a]delete_by_metadata_filter + tests

---
 .../astradb/langchain_astradb/vectorstores.py | 56 +++++++++++++++++++
 .../integration_tests/test_vectorstore.py     | 54 ++++++++++++++++++
 .../test_vectorstore_autodetect.py            | 28 ++++++++++
 3 files changed, 138 insertions(+)

diff --git a/libs/astradb/langchain_astradb/vectorstores.py b/libs/astradb/langchain_astradb/vectorstores.py
index 8b1576a..c63452b 100644
--- a/libs/astradb/langchain_astradb/vectorstores.py
+++ b/libs/astradb/langchain_astradb/vectorstores.py
@@ -827,6 +827,62 @@ async def adelete(
         )
         return True
 
+    def delete_by_metadata_filter(
+        self,
+        filter: dict[str, Any],  # noqa: A002
+    ) -> int | None:
+        """Delete all documents matching a certain metadata filtering condition.
+
+        This operation does not use the vector embeddings in any way, it simply
+        removes all documents whose metadata match the provided condition.
+        Use with caution: passing an empty filter dictionary results in
+        completely emptying the vector store.
+
+        Args:
+            filter: Filter on the metadata to apply.
+
+        Returns:
+            An number expressing the amount of deleted documents.
+            This will be None if a `{}` metadata filter condition is passed,
+            implying emptying the store entirely.
+        """
+        self.astra_env.ensure_db_setup()
+        metadata_parameter = self.filter_to_query(filter)
+        del_result = self.astra_env.collection.delete_many(
+            filter=metadata_parameter,
+        )
+        if del_result.deleted_count is not None and del_result.deleted_count >= 0:
+            return del_result.deleted_count
+        return None
+
+    async def adelete_by_metadata_filter(
+        self,
+        filter: dict[str, Any],  # noqa: A002
+    ) -> int | None:
+        """Delete all documents matching a certain metadata filtering condition.
+
+        This operation does not use the vector embeddings in any way, it simply
+        removes all documents whose metadata match the provided condition.
+        Use with caution: passing an empty filter dictionary results in
+        completely emptying the vector store.
+
+        Args:
+            filter: Filter on the metadata to apply.
+
+        Returns:
+            An number expressing the amount of deleted documents.
+            This will be None if a `{}` metadata filter condition is passed,
+            implying emptying the store entirely.
+        """
+        await self.astra_env.aensure_db_setup()
+        metadata_parameter = self.filter_to_query(filter)
+        del_result = await self.astra_env.async_collection.delete_many(
+            filter=metadata_parameter,
+        )
+        if del_result.deleted_count is not None and del_result.deleted_count >= 0:
+            return del_result.deleted_count
+        return None
+
     def delete_collection(self) -> None:
         """Completely delete the collection from the database.
 
diff --git a/libs/astradb/tests/integration_tests/test_vectorstore.py b/libs/astradb/tests/integration_tests/test_vectorstore.py
index 721d033..277cd3c 100644
--- a/libs/astradb/tests/integration_tests/test_vectorstore.py
+++ b/libs/astradb/tests/integration_tests/test_vectorstore.py
@@ -819,6 +819,60 @@ async def test_astradb_vectorstore_massive_insert_replace_async(
         for doc, _, doc_id in full_results:
             assert doc.page_content == expected_text_by_id[doc_id]
 
+    def test_astradb_vectorstore_delete_by_metadata_sync(
+        self,
+        vector_store_d2: AstraDBVectorStore,
+    ) -> None:
+        """Testing delete_by_metadata_filter."""
+        full_size = 400
+        # one in ... will be deleted
+        deletee_ratio = 3
+
+        documents = [
+            Document(
+                page_content="[1,1]", metadata={"deletee": doc_i % deletee_ratio == 0}
+            )
+            for doc_i in range(full_size)
+        ]
+
+        inserted_ids0 = vector_store_d2.add_documents(documents)
+        assert len(inserted_ids0) == len(documents)
+
+        d_result0 = vector_store_d2.delete_by_metadata_filter({"deletee": True})
+        assert d_result0 is not None
+        assert d_result0 == len([doc for doc in documents if doc.metadata["deletee"]])
+
+        d_result1 = vector_store_d2.delete_by_metadata_filter({})
+        assert d_result1 is None
+        assert len(vector_store_d2.similarity_search("[1,1]", k=1)) == 0
+
+    async def test_astradb_vectorstore_delete_by_metadata_async(
+        self,
+        vector_store_d2: AstraDBVectorStore,
+    ) -> None:
+        """Testing delete_by_metadata_filter, async version."""
+        full_size = 400
+        # one in ... will be deleted
+        deletee_ratio = 3
+
+        documents = [
+            Document(
+                page_content="[1,1]", metadata={"deletee": doc_i % deletee_ratio == 0}
+            )
+            for doc_i in range(full_size)
+        ]
+
+        inserted_ids0 = await vector_store_d2.aadd_documents(documents)
+        assert len(inserted_ids0) == len(documents)
+
+        d_result0 = await vector_store_d2.adelete_by_metadata_filter({"deletee": True})
+        assert d_result0 is not None
+        assert d_result0 == len([doc for doc in documents if doc.metadata["deletee"]])
+
+        d_result1 = await vector_store_d2.adelete_by_metadata_filter({})
+        assert d_result1 is None
+        assert len(await vector_store_d2.asimilarity_search("[1,1]", k=1)) == 0
+
     def test_astradb_vectorstore_mmr_sync(
         self,
         vector_store_d2: AstraDBVectorStore,
diff --git a/libs/astradb/tests/integration_tests/test_vectorstore_autodetect.py b/libs/astradb/tests/integration_tests/test_vectorstore_autodetect.py
index 60caf47..986de7a 100644
--- a/libs/astradb/tests/integration_tests/test_vectorstore_autodetect.py
+++ b/libs/astradb/tests/integration_tests/test_vectorstore_autodetect.py
@@ -92,6 +92,13 @@ def test_autodetect_flat_novectorize_crud(
         results2 = ad_store.similarity_search("[-1,-1]", k=3, filter={"q2": "Q2"})
         assert results2 == [Document(id=id4, page_content=pc4, metadata=md4)]
 
+        # delete by metadata
+        del_by_md = ad_store.delete_by_metadata_filter(filter={"q2": "Q2"})
+        assert del_by_md is not None
+        assert del_by_md == 1
+        results2n = ad_store.similarity_search("[-1,-1]", k=3, filter={"q2": "Q2"})
+        assert results2n == []
+
     def test_autodetect_default_novectorize_crud(
         self,
         astra_db_credentials: AstraDBCredentials,
@@ -148,6 +155,13 @@ def test_autodetect_default_novectorize_crud(
         results2 = ad_store.similarity_search("[9,10]", k=3, filter={"q2": "Q2"})
         assert results2 == [Document(id=id4, page_content=pc4, metadata=md4)]
 
+        # delete by metadata
+        del_by_md = ad_store.delete_by_metadata_filter(filter={"q2": "Q2"})
+        assert del_by_md is not None
+        assert del_by_md == 1
+        results2n = ad_store.similarity_search("[-1,-1]", k=3, filter={"q2": "Q2"})
+        assert results2n == []
+
     def test_autodetect_flat_vectorize_crud(
         self,
         astra_db_credentials: AstraDBCredentials,
@@ -208,6 +222,13 @@ def test_autodetect_flat_vectorize_crud(
         results2 = ad_store.similarity_search("query", k=3, filter={"q2": "Q2"})
         assert results2 == [Document(id=id4, page_content=pc4, metadata=md4)]
 
+        # delete by metadata
+        del_by_md = ad_store.delete_by_metadata_filter(filter={"q2": "Q2"})
+        assert del_by_md is not None
+        assert del_by_md == 1
+        results2n = ad_store.similarity_search("[-1,-1]", k=3, filter={"q2": "Q2"})
+        assert results2n == []
+
     def test_autodetect_default_vectorize_crud(
         self,
         *,
@@ -266,6 +287,13 @@ def test_autodetect_default_vectorize_crud(
         results2 = ad_store.similarity_search("query", k=3, filter={"q2": "Q2"})
         assert results2 == [Document(id=id4, page_content=pc4, metadata=md4)]
 
+        # delete by metadata
+        del_by_md = ad_store.delete_by_metadata_filter(filter={"q2": "Q2"})
+        assert del_by_md is not None
+        assert del_by_md == 1
+        results2n = ad_store.similarity_search("[-1,-1]", k=3, filter={"q2": "Q2"})
+        assert results2n == []
+
     def test_failed_docs_autodetect_flat_novectorize_crud(
         self,
         astra_db_credentials: AstraDBCredentials,

From ef1992f510dbdeefc4ae2d5dd3d11bee17f77b77 Mon Sep 17 00:00:00 2001
From: Stefano Lottini <stefano.lottini@datastax.com>
Date: Wed, 2 Oct 2024 00:47:26 +0200
Subject: [PATCH 2/3] added [a]update_metadata + tests

---
 .../astradb/langchain_astradb/vectorstores.py | 104 +++++++++++++++++
 .../integration_tests/test_vectorstore.py     | 106 ++++++++++++++++++
 .../test_vectorstore_autodetect.py            |  52 +++++++++
 3 files changed, 262 insertions(+)

diff --git a/libs/astradb/langchain_astradb/vectorstores.py b/libs/astradb/langchain_astradb/vectorstores.py
index c63452b..5d6ab39 100644
--- a/libs/astradb/langchain_astradb/vectorstores.py
+++ b/libs/astradb/langchain_astradb/vectorstores.py
@@ -1222,6 +1222,110 @@ async def _replace_document(
                 raise ValueError(msg)
         return inserted_ids
 
+    def update_metadata(
+        self,
+        id_to_metadata: dict[str, dict],
+        *,
+        overwrite_concurrency: int | None = None,
+    ) -> int:
+        """Add/overwrite the metadata of existing documents.
+
+        For each document to update, the new metadata dictionary is added
+        to the existing metadata, overwriting individual keys that existed already.
+
+        Args:
+            id_to_metadata: map from the Document IDs to modify to the
+                new metadata for updating. Keys in this dictionary that
+                do not correspond to an existing document will be silently ignored.
+                The values of this map are metadata dictionaries for updating
+                the documents. Any pre-existing metadata will be merged with
+                these entries, which take precedence on a key-by-key basis.
+            overwrite_concurrency: number of threads to process the updates
+                Defaults to the vector-store overall setting if not provided.
+
+        Returns:
+            the number of documents successfully updated (i.e. found to exist,
+            since even an update with `{}` as the new metadata counts as successful.)
+        """
+        self.astra_env.ensure_db_setup()
+
+        _max_workers = overwrite_concurrency or self.bulk_insert_overwrite_concurrency
+        with ThreadPoolExecutor(
+            max_workers=_max_workers,
+        ) as executor:
+
+            def _update_document(
+                id_md_pair: tuple[str, dict],
+            ) -> UpdateResult:
+                document_id, update_metadata = id_md_pair
+                encoded_metadata = self.filter_to_query(update_metadata)
+                return self.astra_env.collection.update_one(
+                    {"_id": document_id},
+                    {"$set": encoded_metadata},
+                )
+
+            update_results = list(
+                executor.map(
+                    _update_document,
+                    id_to_metadata.items(),
+                )
+            )
+
+        return sum(u_res.update_info["n"] for u_res in update_results)
+
+    async def aupdate_metadata(
+        self,
+        id_to_metadata: dict[str, dict],
+        *,
+        overwrite_concurrency: int | None = None,
+    ) -> int:
+        """Add/overwrite the metadata of existing documents.
+
+        For each document to update, the new metadata dictionary is added
+        to the existing metadata, overwriting individual keys that existed already.
+
+        Args:
+            id_to_metadata: map from the Document IDs to modify to the
+                new metadata for updating. Keys in this dictionary that
+                do not correspond to an existing document will be silently ignored.
+                The values of this map are metadata dictionaries for updating
+                the documents. Any pre-existing metadata will be merged with
+                these entries, which take precedence on a key-by-key basis.
+            overwrite_concurrency: number of threads to process the updates
+                Defaults to the vector-store overall setting if not provided.
+
+        Returns:
+            the number of documents successfully updated (i.e. found to exist,
+            since even an update with `{}` as the new metadata counts as successful.)
+        """
+        await self.astra_env.aensure_db_setup()
+
+        sem = asyncio.Semaphore(
+            overwrite_concurrency or self.bulk_insert_overwrite_concurrency,
+        )
+
+        _async_collection = self.astra_env.async_collection
+
+        async def _update_document(
+            id_md_pair: tuple[str, dict],
+        ) -> UpdateResult:
+            document_id, update_metadata = id_md_pair
+            encoded_metadata = self.filter_to_query(update_metadata)
+            async with sem:
+                return await _async_collection.update_one(
+                    {"_id": document_id},
+                    {"$set": encoded_metadata},
+                )
+
+        tasks = [
+            asyncio.create_task(_update_document(id_md_pair))
+            for id_md_pair in id_to_metadata.items()
+        ]
+
+        update_results = await asyncio.gather(*tasks, return_exceptions=False)
+
+        return sum(u_res.update_info["n"] for u_res in update_results)
+
     @override
     def similarity_search(
         self,
diff --git a/libs/astradb/tests/integration_tests/test_vectorstore.py b/libs/astradb/tests/integration_tests/test_vectorstore.py
index 277cd3c..98646cf 100644
--- a/libs/astradb/tests/integration_tests/test_vectorstore.py
+++ b/libs/astradb/tests/integration_tests/test_vectorstore.py
@@ -873,6 +873,112 @@ async def test_astradb_vectorstore_delete_by_metadata_async(
         assert d_result1 is None
         assert len(await vector_store_d2.asimilarity_search("[1,1]", k=1)) == 0
 
+    def test_astradb_vectorstore_update_metadata_sync(
+        self,
+        vector_store_d2: AstraDBVectorStore,
+    ) -> None:
+        """Testing update_metadata."""
+        # this should not exceed the max number of hits from ANN search
+        full_size = 20
+        # one in ... will be updated
+        updatee_ratio = 2
+        # set this to lower than full_size // updatee_ratio to test everything.
+        update_concurrency = 7
+
+        def doc_sorter(doc: Document) -> str:
+            return doc.id or ""
+
+        orig_documents0 = [
+            Document(
+                page_content="[1,1]",
+                metadata={
+                    "to_update": doc_i % updatee_ratio == 0,
+                    "inert_field": "I",
+                    "updatee_field": "0",
+                },
+                id=f"um_doc_{doc_i}",
+            )
+            for doc_i in range(full_size)
+        ]
+        orig_documents = sorted(orig_documents0, key=doc_sorter)
+
+        inserted_ids0 = vector_store_d2.add_documents(orig_documents)
+        assert len(inserted_ids0) == len(orig_documents)
+
+        update_map = {
+            f"um_doc_{doc_i}": {"updatee_field": "1", "to_update": False}
+            for doc_i in range(full_size)
+            if doc_i % updatee_ratio == 0
+        }
+        u_result0 = vector_store_d2.update_metadata(
+            update_map,
+            overwrite_concurrency=update_concurrency,
+        )
+        assert u_result0 == len(update_map)
+
+        all_documents = sorted(
+            vector_store_d2.similarity_search("[1,1]", k=full_size),
+            key=doc_sorter,
+        )
+        assert len(all_documents) == len(orig_documents)
+        for doc, orig_doc in zip(all_documents, orig_documents):
+            assert doc.id == orig_doc.id
+            if doc.id in update_map:
+                assert doc.metadata == orig_doc.metadata | update_map[doc.id]
+
+    async def test_astradb_vectorstore_update_metadata_async(
+        self,
+        vector_store_d2: AstraDBVectorStore,
+    ) -> None:
+        """Testing update_metadata, async version."""
+        # this should not exceed the max number of hits from ANN search
+        full_size = 20
+        # one in ... will be updated
+        updatee_ratio = 2
+        # set this to lower than full_size // updatee_ratio to test everything.
+        update_concurrency = 7
+
+        def doc_sorter(doc: Document) -> str:
+            return doc.id or ""
+
+        orig_documents0 = [
+            Document(
+                page_content="[1,1]",
+                metadata={
+                    "to_update": doc_i % updatee_ratio == 0,
+                    "inert_field": "I",
+                    "updatee_field": "0",
+                },
+                id=f"um_doc_{doc_i}",
+            )
+            for doc_i in range(full_size)
+        ]
+        orig_documents = sorted(orig_documents0, key=doc_sorter)
+
+        inserted_ids0 = await vector_store_d2.aadd_documents(orig_documents)
+        assert len(inserted_ids0) == len(orig_documents)
+
+        update_map = {
+            f"um_doc_{doc_i}": {"updatee_field": "1", "to_update": False}
+            for doc_i in range(full_size)
+            if doc_i % updatee_ratio == 0
+        }
+        u_result0 = await vector_store_d2.aupdate_metadata(
+            update_map,
+            overwrite_concurrency=update_concurrency,
+        )
+        assert u_result0 == len(update_map)
+
+        all_documents = sorted(
+            await vector_store_d2.asimilarity_search("[1,1]", k=full_size),
+            key=doc_sorter,
+        )
+        assert len(all_documents) == len(orig_documents)
+        for doc, orig_doc in zip(all_documents, orig_documents):
+            assert doc.id == orig_doc.id
+            if doc.id in update_map:
+                assert doc.metadata == orig_doc.metadata | update_map[doc.id]
+
     def test_astradb_vectorstore_mmr_sync(
         self,
         vector_store_d2: AstraDBVectorStore,
diff --git a/libs/astradb/tests/integration_tests/test_vectorstore_autodetect.py b/libs/astradb/tests/integration_tests/test_vectorstore_autodetect.py
index 986de7a..94f84f8 100644
--- a/libs/astradb/tests/integration_tests/test_vectorstore_autodetect.py
+++ b/libs/astradb/tests/integration_tests/test_vectorstore_autodetect.py
@@ -99,6 +99,19 @@ def test_autodetect_flat_novectorize_crud(
         results2n = ad_store.similarity_search("[-1,-1]", k=3, filter={"q2": "Q2"})
         assert results2n == []
 
+        def doc_sorter(doc: Document) -> str:
+            return doc.id or ""
+
+        # update metadata
+        ad_store.update_metadata(
+            {"1": {"m1": "A", "mZ": "Z"}, "2": {"m1": "B", "mZ": "Z"}}
+        )
+        matches_z = ad_store.similarity_search("[-1,-1]", k=3, filter={"mZ": "Z"})
+        assert len(matches_z) == 2
+        s_matches_z = sorted(matches_z, key=doc_sorter)
+        assert s_matches_z[0].metadata == {"m1": "A", "m2": "x", "mZ": "Z"}
+        assert s_matches_z[1].metadata == {"m1": "B", "m2": "y", "mZ": "Z"}
+
     def test_autodetect_default_novectorize_crud(
         self,
         astra_db_credentials: AstraDBCredentials,
@@ -162,6 +175,19 @@ def test_autodetect_default_novectorize_crud(
         results2n = ad_store.similarity_search("[-1,-1]", k=3, filter={"q2": "Q2"})
         assert results2n == []
 
+        def doc_sorter(doc: Document) -> str:
+            return doc.id or ""
+
+        # update metadata
+        ad_store.update_metadata(
+            {"1": {"m1": "A", "mZ": "Z"}, "2": {"m1": "B", "mZ": "Z"}}
+        )
+        matches_z = ad_store.similarity_search("[-1,-1]", k=3, filter={"mZ": "Z"})
+        assert len(matches_z) == 2
+        s_matches_z = sorted(matches_z, key=doc_sorter)
+        assert s_matches_z[0].metadata == {"m1": "A", "m2": "x", "mZ": "Z"}
+        assert s_matches_z[1].metadata == {"m1": "B", "m2": "y", "mZ": "Z"}
+
     def test_autodetect_flat_vectorize_crud(
         self,
         astra_db_credentials: AstraDBCredentials,
@@ -229,6 +255,19 @@ def test_autodetect_flat_vectorize_crud(
         results2n = ad_store.similarity_search("[-1,-1]", k=3, filter={"q2": "Q2"})
         assert results2n == []
 
+        def doc_sorter(doc: Document) -> str:
+            return doc.id or ""
+
+        # update metadata
+        ad_store.update_metadata(
+            {"1": {"m1": "A", "mZ": "Z"}, "2": {"m1": "B", "mZ": "Z"}}
+        )
+        matches_z = ad_store.similarity_search("[-1,-1]", k=3, filter={"mZ": "Z"})
+        assert len(matches_z) == 2
+        s_matches_z = sorted(matches_z, key=doc_sorter)
+        assert s_matches_z[0].metadata == {"m1": "A", "m2": "x", "mZ": "Z"}
+        assert s_matches_z[1].metadata == {"m1": "B", "m2": "y", "mZ": "Z"}
+
     def test_autodetect_default_vectorize_crud(
         self,
         *,
@@ -294,6 +333,19 @@ def test_autodetect_default_vectorize_crud(
         results2n = ad_store.similarity_search("[-1,-1]", k=3, filter={"q2": "Q2"})
         assert results2n == []
 
+        def doc_sorter(doc: Document) -> str:
+            return doc.id or ""
+
+        # update metadata
+        ad_store.update_metadata(
+            {"1": {"m1": "A", "mZ": "Z"}, "2": {"m1": "B", "mZ": "Z"}}
+        )
+        matches_z = ad_store.similarity_search("[-1,-1]", k=3, filter={"mZ": "Z"})
+        assert len(matches_z) == 2
+        s_matches_z = sorted(matches_z, key=doc_sorter)
+        assert s_matches_z[0].metadata == {"m1": "A", "m2": "x", "mZ": "Z"}
+        assert s_matches_z[1].metadata == {"m1": "B", "m2": "y", "mZ": "Z"}
+
     def test_failed_docs_autodetect_flat_novectorize_crud(
         self,
         astra_db_credentials: AstraDBCredentials,

From b7d843693558856ce5b78db303895388808228e3 Mon Sep 17 00:00:00 2001
From: Stefano Lottini <stefano.lottini@datastax.com>
Date: Thu, 3 Oct 2024 10:41:43 +0200
Subject: [PATCH 3/3] [a]delete_by_metadata refuses empty filters + adjusted
 tests

---
 .../astradb/langchain_astradb/vectorstores.py | 42 ++++++++++---------
 .../integration_tests/test_vectorstore.py     | 34 ++++++++++-----
 2 files changed, 46 insertions(+), 30 deletions(-)

diff --git a/libs/astradb/langchain_astradb/vectorstores.py b/libs/astradb/langchain_astradb/vectorstores.py
index 5d6ab39..84b2083 100644
--- a/libs/astradb/langchain_astradb/vectorstores.py
+++ b/libs/astradb/langchain_astradb/vectorstores.py
@@ -830,58 +830,60 @@ async def adelete(
     def delete_by_metadata_filter(
         self,
         filter: dict[str, Any],  # noqa: A002
-    ) -> int | None:
+    ) -> int:
         """Delete all documents matching a certain metadata filtering condition.
 
         This operation does not use the vector embeddings in any way, it simply
         removes all documents whose metadata match the provided condition.
-        Use with caution: passing an empty filter dictionary results in
-        completely emptying the vector store.
 
         Args:
-            filter: Filter on the metadata to apply.
+            filter: Filter on the metadata to apply. The filter cannot be empty.
 
         Returns:
             An number expressing the amount of deleted documents.
-            This will be None if a `{}` metadata filter condition is passed,
-            implying emptying the store entirely.
         """
+        if not filter:
+            msg = (
+                "Method `delete_by_metadata_filter` does not accept an empty "
+                "filter. Use the `clear()` method if you really want to empty "
+                "the vector store."
+            )
+            raise ValueError(msg)
         self.astra_env.ensure_db_setup()
         metadata_parameter = self.filter_to_query(filter)
         del_result = self.astra_env.collection.delete_many(
             filter=metadata_parameter,
         )
-        if del_result.deleted_count is not None and del_result.deleted_count >= 0:
-            return del_result.deleted_count
-        return None
+        return del_result.deleted_count or 0
 
     async def adelete_by_metadata_filter(
         self,
         filter: dict[str, Any],  # noqa: A002
-    ) -> int | None:
+    ) -> int:
         """Delete all documents matching a certain metadata filtering condition.
 
         This operation does not use the vector embeddings in any way, it simply
         removes all documents whose metadata match the provided condition.
-        Use with caution: passing an empty filter dictionary results in
-        completely emptying the vector store.
 
         Args:
-            filter: Filter on the metadata to apply.
+            filter: Filter on the metadata to apply. The filter cannot be empty.
 
         Returns:
             An number expressing the amount of deleted documents.
-            This will be None if a `{}` metadata filter condition is passed,
-            implying emptying the store entirely.
         """
+        if not filter:
+            msg = (
+                "Method `delete_by_metadata_filter` does not accept an empty "
+                "filter. Use the `clear()` method if you really want to empty "
+                "the vector store."
+            )
+            raise ValueError(msg)
         await self.astra_env.aensure_db_setup()
         metadata_parameter = self.filter_to_query(filter)
         del_result = await self.astra_env.async_collection.delete_many(
             filter=metadata_parameter,
         )
-        if del_result.deleted_count is not None and del_result.deleted_count >= 0:
-            return del_result.deleted_count
-        return None
+        return del_result.deleted_count or 0
 
     def delete_collection(self) -> None:
         """Completely delete the collection from the database.
@@ -1230,7 +1232,7 @@ def update_metadata(
     ) -> int:
         """Add/overwrite the metadata of existing documents.
 
-        For each document to update, the new metadata dictionary is added
+        For each document to update, the new metadata dictionary is appended
         to the existing metadata, overwriting individual keys that existed already.
 
         Args:
@@ -1281,7 +1283,7 @@ async def aupdate_metadata(
     ) -> int:
         """Add/overwrite the metadata of existing documents.
 
-        For each document to update, the new metadata dictionary is added
+        For each document to update, the new metadata dictionary is appended
         to the existing metadata, overwriting individual keys that existed already.
 
         Args:
diff --git a/libs/astradb/tests/integration_tests/test_vectorstore.py b/libs/astradb/tests/integration_tests/test_vectorstore.py
index 98646cf..9b5b2b2 100644
--- a/libs/astradb/tests/integration_tests/test_vectorstore.py
+++ b/libs/astradb/tests/integration_tests/test_vectorstore.py
@@ -834,17 +834,24 @@ def test_astradb_vectorstore_delete_by_metadata_sync(
             )
             for doc_i in range(full_size)
         ]
+        num_deletees = len([doc for doc in documents if doc.metadata["deletee"]])
 
         inserted_ids0 = vector_store_d2.add_documents(documents)
         assert len(inserted_ids0) == len(documents)
 
         d_result0 = vector_store_d2.delete_by_metadata_filter({"deletee": True})
-        assert d_result0 is not None
-        assert d_result0 == len([doc for doc in documents if doc.metadata["deletee"]])
+        assert d_result0 == num_deletees
+        count_on_store0 = len(
+            vector_store_d2.similarity_search("[1,1]", k=full_size + 1)
+        )
+        assert count_on_store0 == full_size - num_deletees
 
-        d_result1 = vector_store_d2.delete_by_metadata_filter({})
-        assert d_result1 is None
-        assert len(vector_store_d2.similarity_search("[1,1]", k=1)) == 0
+        with pytest.raises(ValueError, match="does not accept an empty"):
+            vector_store_d2.delete_by_metadata_filter({})
+        count_on_store1 = len(
+            vector_store_d2.similarity_search("[1,1]", k=full_size + 1)
+        )
+        assert count_on_store1 == full_size - num_deletees
 
     async def test_astradb_vectorstore_delete_by_metadata_async(
         self,
@@ -861,17 +868,24 @@ async def test_astradb_vectorstore_delete_by_metadata_async(
             )
             for doc_i in range(full_size)
         ]
+        num_deletees = len([doc for doc in documents if doc.metadata["deletee"]])
 
         inserted_ids0 = await vector_store_d2.aadd_documents(documents)
         assert len(inserted_ids0) == len(documents)
 
         d_result0 = await vector_store_d2.adelete_by_metadata_filter({"deletee": True})
-        assert d_result0 is not None
-        assert d_result0 == len([doc for doc in documents if doc.metadata["deletee"]])
+        assert d_result0 == num_deletees
+        count_on_store0 = len(
+            await vector_store_d2.asimilarity_search("[1,1]", k=full_size + 1)
+        )
+        assert count_on_store0 == full_size - num_deletees
 
-        d_result1 = await vector_store_d2.adelete_by_metadata_filter({})
-        assert d_result1 is None
-        assert len(await vector_store_d2.asimilarity_search("[1,1]", k=1)) == 0
+        with pytest.raises(ValueError, match="does not accept an empty"):
+            await vector_store_d2.adelete_by_metadata_filter({})
+        count_on_store1 = len(
+            await vector_store_d2.asimilarity_search("[1,1]", k=full_size + 1)
+        )
+        assert count_on_store1 == full_size - num_deletees
 
     def test_astradb_vectorstore_update_metadata_sync(
         self,