From d90e3e8ca357090f67f5f9236cd65004280143c4 Mon Sep 17 00:00:00 2001 From: Fatih Acar Date: Sat, 25 Jan 2025 00:27:06 +0100 Subject: [PATCH 1/6] fix(backend): use explicit where clause instead of all() 10x speed-up. It would also consume too much memory and startup would fail when heap size is not enough. Signed-off-by: Fatih Acar --- backend/infrahub/core/query/node.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/backend/infrahub/core/query/node.py b/backend/infrahub/core/query/node.py index 37985a8275..c8413d28ea 100644 --- a/backend/infrahub/core/query/node.py +++ b/backend/infrahub/core/query/node.py @@ -479,11 +479,17 @@ async def query_init(self, db: InfrahubDatabase, **kwargs) -> None: self.return_labels = ["n", "a", "av", "r1", "r2"] # Add Is_Protected and Is_visible + rel_isv_branch_filter, _ = self.branch.get_query_filter_path( + at=self.at, branch_agnostic=self.branch_agnostic, variable_name="rel_isv" + ) + rel_isp_branch_filter, _ = self.branch.get_query_filter_path( + at=self.at, branch_agnostic=self.branch_agnostic, variable_name="rel_isp" + ) query = """ MATCH (a)-[rel_isv:IS_VISIBLE]-(isv:Boolean) MATCH (a)-[rel_isp:IS_PROTECTED]-(isp:Boolean) - WHERE all(r IN [rel_isv, rel_isp] WHERE ( %(branch_filter)s )) - """ % {"branch_filter": branch_filter} + WHERE (%(rel_isv_branch_filter)s) AND (%(rel_isp_branch_filter)s) + """ % {"rel_isv_branch_filter": rel_isv_branch_filter, "rel_isp_branch_filter": rel_isp_branch_filter} self.add_to_query(query) self.return_labels.extend(["isv", "isp", "rel_isv", "rel_isp"]) From 738a42945bd51e884972901715c73898b100731c Mon Sep 17 00:00:00 2001 From: Fatih Acar Date: Sun, 26 Jan 2025 00:52:46 +0100 Subject: [PATCH 2/6] fix(backend): do not use old uniqueness constraint filter It adds a significant overhead since it uses NodeGetListQuery. Signed-off-by: Fatih Acar --- .../dependencies/builder/constraint/grouped/node_runner.py | 2 -- backend/tests/unit/graphql/test_mutation_create.py | 6 +++--- backend/tests/unit/graphql/test_mutation_update.py | 2 +- backend/tests/unit/graphql/test_mutation_upsert.py | 2 +- 4 files changed, 5 insertions(+), 7 deletions(-) diff --git a/backend/infrahub/dependencies/builder/constraint/grouped/node_runner.py b/backend/infrahub/dependencies/builder/constraint/grouped/node_runner.py index 8c082de1df..2a495e079b 100644 --- a/backend/infrahub/dependencies/builder/constraint/grouped/node_runner.py +++ b/backend/infrahub/dependencies/builder/constraint/grouped/node_runner.py @@ -2,7 +2,6 @@ from infrahub.dependencies.interface import DependencyBuilder, DependencyBuilderContext from ..node.grouped_uniqueness import NodeGroupedUniquenessConstraintDependency -from ..node.uniqueness import NodeAttributeUniquenessConstraintDependency from ..relationship_manager.count import RelationshipCountConstraintDependency from ..relationship_manager.peer_kind import RelationshipPeerKindConstraintDependency from ..relationship_manager.profiles_kind import RelationshipProfilesKindConstraintDependency @@ -15,7 +14,6 @@ def build(cls, context: DependencyBuilderContext) -> NodeConstraintRunner: db=context.db, branch=context.branch, node_constraints=[ - NodeAttributeUniquenessConstraintDependency.build(context=context), NodeGroupedUniquenessConstraintDependency.build(context=context), ], relationship_manager_constraints=[ diff --git a/backend/tests/unit/graphql/test_mutation_create.py b/backend/tests/unit/graphql/test_mutation_create.py index a7d242a49c..57fe7b833b 100644 --- a/backend/tests/unit/graphql/test_mutation_create.py +++ b/backend/tests/unit/graphql/test_mutation_create.py @@ -140,7 +140,7 @@ async def test_create_check_unique(db: InfrahubDatabase, default_branch, car_per assert result.errors assert len(result.errors) == 1 - assert "An object already exist" in result.errors[0].message + assert "Violates uniqueness constraint" in result.errors[0].message async def test_create_check_unique_across_branch(db: InfrahubDatabase, default_branch, car_person_schema): @@ -172,7 +172,7 @@ async def test_create_check_unique_across_branch(db: InfrahubDatabase, default_b assert result.errors assert len(result.errors) == 1 - assert "An object already exist" in result.errors[0].message + assert "Violates uniqueness constraint" in result.errors[0].message async def test_create_check_unique_in_branch(db: InfrahubDatabase, default_branch, car_person_schema): @@ -203,7 +203,7 @@ async def test_create_check_unique_in_branch(db: InfrahubDatabase, default_branc assert result.errors assert len(result.errors) == 1 - assert "An object already exist" in result.errors[0].message + assert "Violates uniqueness constraint" in result.errors[0].message async def test_all_attributes(db: InfrahubDatabase, default_branch, all_attribute_types_schema): diff --git a/backend/tests/unit/graphql/test_mutation_update.py b/backend/tests/unit/graphql/test_mutation_update.py index 7cb65336fb..c1956d0580 100644 --- a/backend/tests/unit/graphql/test_mutation_update.py +++ b/backend/tests/unit/graphql/test_mutation_update.py @@ -174,7 +174,7 @@ async def test_update_check_unique(db: InfrahubDatabase, person_john_main: Node, assert result.errors assert len(result.errors) == 1 - assert "An object already exist" in result.errors[0].message + assert "Violates uniqueness constraint" in result.errors[0].message async def test_update_object_with_flag_property(db: InfrahubDatabase, person_john_main: Node, branch: Branch): diff --git a/backend/tests/unit/graphql/test_mutation_upsert.py b/backend/tests/unit/graphql/test_mutation_upsert.py index 51dab4b77e..838faaaaff 100644 --- a/backend/tests/unit/graphql/test_mutation_upsert.py +++ b/backend/tests/unit/graphql/test_mutation_upsert.py @@ -204,7 +204,7 @@ async def test_update_by_id_to_nonunique_value_raises_error( variable_values={}, ) - expected_error = "An object already exist with this value: name: Jim at name" + expected_error = "Violates uniqueness constraint 'name' at name" assert any(expected_error in error.message for error in result.errors) From 9f7c938e2bdc2d441b29ad2b37db3cfe71d603b9 Mon Sep 17 00:00:00 2001 From: Fatih Acar Date: Wed, 29 Jan 2025 11:39:24 +0100 Subject: [PATCH 3/6] fix(backend): improve relationship_count_per_node query Using an early filter. Signed-off-by: Fatih Acar --- backend/infrahub/core/query/relationship.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/backend/infrahub/core/query/relationship.py b/backend/infrahub/core/query/relationship.py index dd28a53766..3b66dc6469 100644 --- a/backend/infrahub/core/query/relationship.py +++ b/backend/infrahub/core/query/relationship.py @@ -905,7 +905,8 @@ async def query_init(self, db: InfrahubDatabase, **kwargs) -> None: path = "<-[r:IS_RELATED]-" query = """ - MATCH (rl:Relationship { name: $rel_identifier }) + MATCH (peer_node:Node)%(path)s(rl:Relationship { name: $rel_identifier }) + WHERE peer_node.uuid IN $peer_ids AND %(branch_filter)s CALL { WITH rl MATCH path = (peer_node:Node)%(path)s(rl) From 24e76f83763a887fa562d2c65e34eff45d663525 Mon Sep 17 00:00:00 2001 From: Fatih Acar Date: Wed, 29 Jan 2025 11:40:32 +0100 Subject: [PATCH 4/6] fix(backend): improve node_uniqueness_constraints query By not running a subquery for each row. This should improve scaling this query when node count grows. Signed-off-by: Fatih Acar --- .../core/validators/uniqueness/query.py | 24 +++++++------------ 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/backend/infrahub/core/validators/uniqueness/query.py b/backend/infrahub/core/validators/uniqueness/query.py index de63c9cd7f..189dc64b1d 100644 --- a/backend/infrahub/core/validators/uniqueness/query.py +++ b/backend/infrahub/core/validators/uniqueness/query.py @@ -94,29 +94,26 @@ async def query_init(self, db: InfrahubDatabase, **kwargs: Any) -> None: ) attr_paths_subquery = """ - WITH start_node - MATCH attr_path = (start_node)-[:HAS_ATTRIBUTE]->(attr:Attribute)-[r:HAS_VALUE]->(attr_value:AttributeValue) + MATCH attr_path = (start_node:%(node_kind)s)-[:HAS_ATTRIBUTE]->(attr:Attribute)-[r:HAS_VALUE]->(attr_value:AttributeValue) WHERE attr.name in $attribute_names AND ([attr.name, type(r)] in $attr_paths OR [attr.name, type(r), attr_value.value] in $attr_paths_with_value) - RETURN attr_path as potential_path, NULL as rel_identifier, attr.name as potential_attr, attr_value.value as potential_attr_value - """ + RETURN start_node, attr_path as potential_path, NULL as rel_identifier, attr.name as potential_attr, attr_value.value as potential_attr_value + """ % {"node_kind": self.query_request.kind} relationship_attr_paths_with_value_subquery = """ - WITH start_node - MATCH rel_path = (start_node)-[:IS_RELATED]-(relationship_node:Relationship)-[:IS_RELATED]-(related_n:Node)-[:HAS_ATTRIBUTE]->(rel_attr:Attribute)-[:HAS_VALUE]->(rel_attr_value:AttributeValue) + MATCH rel_path = (start_node:%(node_kind)s)-[:IS_RELATED]-(relationship_node:Relationship)-[:IS_RELATED]-(related_n:Node)-[:HAS_ATTRIBUTE]->(rel_attr:Attribute)-[:HAS_VALUE]->(rel_attr_value:AttributeValue) WHERE relationship_node.name in $relationship_names AND ([relationship_node.name, rel_attr.name] in $relationship_attr_paths OR [relationship_node.name, rel_attr.name, rel_attr_value.value] in $relationship_attr_paths_with_value) - RETURN rel_path as potential_path, relationship_node.name as rel_identifier, rel_attr.name as potential_attr, rel_attr_value.value as potential_attr_value - """ + RETURN start_node, rel_path as potential_path, relationship_node.name as rel_identifier, rel_attr.name as potential_attr, rel_attr_value.value as potential_attr_value + """ % {"node_kind": self.query_request.kind} relationship_only_attr_paths_subquery = """ - WITH start_node - MATCH rel_path = (start_node)-[:IS_RELATED]-(relationship_node:Relationship)-[:IS_RELATED]-(related_n:Node) + MATCH rel_path = (start_node:%(node_kind)s)-[:IS_RELATED]-(relationship_node:Relationship)-[:IS_RELATED]-(related_n:Node) WHERE relationship_node.name in $relationship_only_attr_paths - RETURN rel_path as potential_path, relationship_node.name as rel_identifier, "id" as potential_attr, related_n.uuid as potential_attr_value - """ + RETURN start_node, rel_path as potential_path, relationship_node.name as rel_identifier, "id" as potential_attr, related_n.uuid as potential_attr_value + """ % {"node_kind": self.query_request.kind} select_subqueries = [] if attr_paths or attr_paths_with_value: @@ -130,8 +127,6 @@ async def query_init(self, db: InfrahubDatabase, **kwargs: Any) -> None: # ruff: noqa: E501 query = """ - // group by node - MATCH (start_node:%(node_kind)s) // get attributes for node and its relationships CALL { %(select_subqueries_str)s @@ -201,7 +196,6 @@ async def query_init(self, db: InfrahubDatabase, **kwargs: Any) -> None: attr_value, relationship_identifier """ % { - "node_kind": self.query_request.kind, "select_subqueries_str": select_subqueries_str, "branch_filter": branch_filter, "from_times": from_times, From c2acdfd7fd642b9f90e0105b140969eca1c5670d Mon Sep 17 00:00:00 2001 From: Fatih Acar Date: Fri, 31 Jan 2025 22:50:40 +0100 Subject: [PATCH 5/6] fix(backend): early filter on uniqueness constraint query We can filter out relationship constraints by node uuids. Signed-off-by: Fatih Acar --- .../core/node/constraints/grouped_uniqueness.py | 11 +++++++++-- .../infrahub/core/validators/uniqueness/query.py | 15 ++++++++++++--- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/backend/infrahub/core/node/constraints/grouped_uniqueness.py b/backend/infrahub/core/node/constraints/grouped_uniqueness.py index db60e86f07..c1c1b471a8 100644 --- a/backend/infrahub/core/node/constraints/grouped_uniqueness.py +++ b/backend/infrahub/core/node/constraints/grouped_uniqueness.py @@ -35,7 +35,7 @@ def __init__(self, db: InfrahubDatabase, branch: Branch) -> None: self.branch = branch self.schema_branch = registry.schema.get_schema_branch(branch.name) - def _build_query_request( + async def _build_query_request( self, updated_node: Node, node_schema: MainSchemaTypes, @@ -51,9 +51,16 @@ def _build_query_request( if attribute_path.related_schema and attribute_path.relationship_schema: if filters and attribute_path.relationship_schema.name in filters: include_in_query = True + + relationship_manager: RelationshipManager = getattr( + updated_node, attribute_path.relationship_schema.name + ) + related_node = await relationship_manager.get_peer(db=self.db) + related_node_id = related_node.get_id() if related_node else None query_relationship_paths.add( QueryRelationshipAttributePath( identifier=attribute_path.relationship_schema.get_identifier(), + value=related_node_id, ) ) continue @@ -158,7 +165,7 @@ async def _check_one_schema( ) -> None: schema_branch = self.db.schema.get_schema_branch(name=self.branch.name) path_groups = node_schema.get_unique_constraint_schema_attribute_paths(schema_branch=schema_branch) - query_request = self._build_query_request( + query_request = await self._build_query_request( updated_node=node, node_schema=node_schema, path_groups=path_groups, filters=filters ) if not query_request: diff --git a/backend/infrahub/core/validators/uniqueness/query.py b/backend/infrahub/core/validators/uniqueness/query.py index 189dc64b1d..990e8ae2ad 100644 --- a/backend/infrahub/core/validators/uniqueness/query.py +++ b/backend/infrahub/core/validators/uniqueness/query.py @@ -30,7 +30,7 @@ def __init__( def get_context(self) -> dict[str, str]: return {"kind": self.query_request.kind} - async def query_init(self, db: InfrahubDatabase, **kwargs: Any) -> None: + async def query_init(self, db: InfrahubDatabase, **kwargs: Any) -> None: # pylint: disable=too-many-branches branch_filter, branch_params = self.branch.get_query_filter_path(at=self.at.to_string(), is_isolated=False) self.params.update(branch_params) from_times = db.render_list_comprehension(items="relationships(potential_path)", item_name="from") @@ -56,6 +56,7 @@ async def query_init(self, db: InfrahubDatabase, **kwargs: Any) -> None: relationship_names = set() relationship_attr_paths = [] relationship_only_attr_paths = [] + relationship_only_attr_values = [] relationship_attr_paths_with_value = [] for rel_path in self.query_request.relationship_attribute_paths: relationship_names.add(rel_path.identifier) @@ -67,6 +68,8 @@ async def query_init(self, db: InfrahubDatabase, **kwargs: Any) -> None: relationship_attr_paths.append((rel_path.identifier, rel_path.attribute_name)) else: relationship_only_attr_paths.append(rel_path.identifier) + if rel_path.value: + relationship_only_attr_values.append(rel_path.value) if ( not attr_paths @@ -89,6 +92,7 @@ async def query_init(self, db: InfrahubDatabase, **kwargs: Any) -> None: "relationship_attr_paths": relationship_attr_paths, "relationship_attr_paths_with_value": relationship_attr_paths_with_value, "relationship_only_attr_paths": relationship_only_attr_paths, + "relationship_only_attr_values": relationship_only_attr_values, "min_count_required": self.min_count_required, } ) @@ -111,9 +115,14 @@ async def query_init(self, db: InfrahubDatabase, **kwargs: Any) -> None: relationship_only_attr_paths_subquery = """ MATCH rel_path = (start_node:%(node_kind)s)-[:IS_RELATED]-(relationship_node:Relationship)-[:IS_RELATED]-(related_n:Node) - WHERE relationship_node.name in $relationship_only_attr_paths + WHERE %(rel_node_filter)s relationship_node.name in $relationship_only_attr_paths RETURN start_node, rel_path as potential_path, relationship_node.name as rel_identifier, "id" as potential_attr, related_n.uuid as potential_attr_value - """ % {"node_kind": self.query_request.kind} + """ % { + "node_kind": self.query_request.kind, + "rel_node_filter": "related_n.uuid IN $relationship_only_attr_values AND " + if relationship_only_attr_values + else "", + } select_subqueries = [] if attr_paths or attr_paths_with_value: From f002cd20171f4791989f7952a47e08f0b9aaa9f1 Mon Sep 17 00:00:00 2001 From: Aaron McCarty Date: Fri, 31 Jan 2025 17:10:54 -0800 Subject: [PATCH 6/6] one more unit test --- .../test_uniqueness_constraint_query.py | 76 +++++++++++++++++++ 1 file changed, 76 insertions(+) diff --git a/backend/tests/unit/core/constraint_validators/test_uniqueness_constraint_query.py b/backend/tests/unit/core/constraint_validators/test_uniqueness_constraint_query.py index 48d69ef8bd..1c3ebe5f7b 100644 --- a/backend/tests/unit/core/constraint_validators/test_uniqueness_constraint_query.py +++ b/backend/tests/unit/core/constraint_validators/test_uniqueness_constraint_query.py @@ -2,6 +2,7 @@ from infrahub.core.initialization import create_branch from infrahub.core.manager import NodeManager from infrahub.core.node import Node +from infrahub.core.schema.schema_branch import SchemaBranch from infrahub.core.validators.uniqueness.model import NodeUniquenessQueryRequest from infrahub.core.validators.uniqueness.query import NodeUniqueAttributeConstraintQuery from infrahub.database import InfrahubDatabase @@ -425,6 +426,81 @@ async def test_query_relationship_violation_no_attribute( assert serial_result in expected_result_dicts +async def test_query_relationship_no_violation_same_peer_different_rels( + db: InfrahubDatabase, default_branch: Branch, animal_person_schema: SchemaBranch +): + john = await Node.init(schema="TestPerson", db=db) + await john.new(db=db, name="John", height=175) + await john.save(db=db) + jane = await Node.init(schema="TestPerson", db=db) + await jane.new(db=db, name="Jane", height=165) + await jane.save(db=db) + johns_dog = await Node.init(schema="TestDog", db=db) + await johns_dog.new(db=db, name="J-dog", breed="mixed", owner=john, best_friend=jane) + await johns_dog.save(db=db) + jane_dog = await Node.init(schema="TestDog", db=db) + await jane_dog.new(db=db, name="Jane-dog", breed="mixed", owner=jane, best_friend=jane) + await jane_dog.save(db=db) + + branch = await create_branch(db=db, branch_name="branch") + joe = await Node.init(schema="TestPerson", db=db, branch=branch) + await joe.new(db=db, name="Joe", height=175) + await joe.save(db=db) + joes_dog = await Node.init(schema="TestDog", db=db, branch=branch) + await joes_dog.new(db=db, name="Joe-dog", breed="mixed", owner=joe, best_friend=jane) + await joes_dog.save(db=db) + + expected_best_friend_result_dicts = [ + { + "attr_name": "id", + "node_id": johns_dog.id, + "node_count": 3, + "attr_value": jane.id, + "relationship_identifier": "person__animal_friend", + "deepest_branch_name": default_branch.name, + }, + { + "attr_name": "id", + "node_id": jane_dog.id, + "node_count": 3, + "attr_value": jane.id, + "relationship_identifier": "person__animal_friend", + "deepest_branch_name": default_branch.name, + }, + { + "attr_name": "id", + "node_id": joes_dog.id, + "node_count": 3, + "attr_value": jane.id, + "relationship_identifier": "person__animal_friend", + "deepest_branch_name": branch.name, + }, + ] + + owner_query = await NodeUniqueAttributeConstraintQuery.init( + db=db, + branch=branch, + query_request=NodeUniquenessQueryRequest( + kind="TestDog", relationship_attribute_paths=[{"identifier": "person__animal", "value": jane.id}] + ), + ) + owner_query_result = await owner_query.execute(db=db) + assert len(owner_query_result.results) == 0 + + best_friend_query = await NodeUniqueAttributeConstraintQuery.init( + db=db, + branch=branch, + query_request=NodeUniquenessQueryRequest( + kind="TestDog", relationship_attribute_paths=[{"identifier": "person__animal_friend", "value": jane.id}] + ), + ) + best_friend_query_result = await best_friend_query.execute(db=db) + assert len(best_friend_query_result.results) == 3 + for result in best_friend_query_result.results: + serial_result = dict(result.data) + assert serial_result in expected_best_friend_result_dicts + + async def test_query_response_min_count_0_attribute_paths( db: InfrahubDatabase, car_accord_main, car_prius_main, branch: Branch, default_branch: Branch ):