Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add migration to fix database edges state #5640

Merged
merged 6 commits into from
Feb 27, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion backend/infrahub/core/graph/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
GRAPH_VERSION = 18
GRAPH_VERSION = 19
2 changes: 2 additions & 0 deletions backend/infrahub/core/migrations/graph/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from .m016_diff_delete_bug_fix import Migration016
from .m017_add_core_profile import Migration017
from .m018_uniqueness_nulls import Migration018
from .m019_restore_rels_to_time import Migration019

if TYPE_CHECKING:
from infrahub.core.root import Root
Expand All @@ -45,6 +46,7 @@
Migration016,
Migration017,
Migration018,
Migration019,
]


Expand Down
256 changes: 256 additions & 0 deletions backend/infrahub/core/migrations/graph/m019_restore_rels_to_time.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,256 @@
from __future__ import annotations

from typing import TYPE_CHECKING, Any, Sequence

from infrahub.core.migrations.shared import GraphMigration, MigrationResult
from infrahub.log import get_logger

from ...constants import GLOBAL_BRANCH_NAME, BranchSupportType
from ...query import Query, QueryType

if TYPE_CHECKING:
from infrahub.database import InfrahubDatabase

log = get_logger()


class FixBranchAwareEdgesQuery(Query):
name = "replace_global_edges"
type = QueryType.WRITE
insert_return = False

async def query_init(self, db: InfrahubDatabase, **kwargs: dict[str, Any]) -> None:
"""
Between a Node and a Relationship, if Relationship.branch_support=aware, replace any global edge
to the branch of a non-global edge leaving out of the Relationship node. Note that there can't
be multiple non-global branches on these edges, as a dedicated Relationship node would exist for that.
"""

query = """
MATCH (node:Node)-[global_edge:IS_RELATED {branch: $global_branch}]-(rel:Relationship)
WHERE rel.branch_support=$branch_aware
MATCH (rel)-[non_global_edge:IS_RELATED]-(node_2: Node)
WHERE non_global_edge.branch <> $global_branch
SET global_edge.branch = non_global_edge.branch
"""

params = {
"global_branch": GLOBAL_BRANCH_NAME,
"branch_aware": BranchSupportType.AWARE.value,
"branch_agnostic": BranchSupportType.AGNOSTIC.value,
}

self.params.update(params)
self.add_to_query(query)


class SetMissingToTimeQuery(Query):
name = "set_missing_to_time"
type = QueryType.WRITE
insert_return = False

async def query_init(self, db: InfrahubDatabase, **kwargs: dict[str, Any]) -> None:
"""
If both a deleted edge and an active edge with no time exist between 2 nodes on the same branch,
set `to` time of active edge using `from` time of the deleted one. This would typically happen after having
replaced a deleted edge on global branch by correct branch with above query.
"""

query = """
MATCH (node:Node)-[deleted_edge:IS_RELATED {status: "deleted"}]-(rel:Relationship)
MATCH (rel)-[active_edge:IS_RELATED {status: "active"}]-(node)
WHERE active_edge.to IS NULL AND deleted_edge.branch = active_edge.branch
SET active_edge.to = deleted_edge.from
"""

self.add_to_query(query)


class DeleteNodesRelsQuery(Query):
name = "delete_relationships_of_deleted_nodes"
type = QueryType.WRITE
insert_return = False

async def query_init(self, db: InfrahubDatabase, **kwargs: dict[str, Any]) -> None:
"""
Some nodes may have been incorrectly deleted, typically, while these nodes edges connected to Root
are correctly deleted, edges connected to other `Node` through a `Relationship` node may still be active.
Following query correctly deletes these edges by both setting correct to time and creating corresponding deleted edge.
"""

query = """
MATCH (deleted_node: Node)-[deleted_edge:IS_PART_OF {status: "deleted"}]->(:Root)
MATCH (deleted_node)-[:IS_RELATED]-(rel:Relationship)

// exclude nodes having been deleted through migration. find those with same uuid and exclude the one with earlier
// timestamp on active branch
WHERE NOT EXISTS {
MATCH (deleted_node)-[e1:IS_RELATED]-(rel)-[e2:IS_RELATED]-(other_node)
WITH deleted_node, other_node, MIN(e1.from) AS min_e1_from, MIN(e2.from) AS min_e2_from
WHERE deleted_node <> other_node AND deleted_node.uuid = other_node.uuid AND min_e1_from < min_e2_from
}

// Note that if an AWARE node has been deleted on a branch and relationship is AGNOSTIC, we do not "delete" this relationship
// right now as this aware node might exist on another branch.

// Set to time if there is an active edge:
// - on deleted edge branch
// - or on any branch and deleted node is agnostic
// - or deleted node is aware and rel is agnostic
CALL {
WITH rel, deleted_edge
OPTIONAL MATCH (rel)-[peer_active_edge {status: "active"}]-(peer_1)
WHERE (peer_active_edge.branch = deleted_edge.branch OR (rel.branch_support <> $branch_agnostic AND deleted_edge.branch = $global_branch))
AND peer_active_edge.to IS NULL
SET peer_active_edge.to = deleted_edge.from
}

// Get distinct rel nodes linked to a deleted node, with the time at which we should delete rel edges.
// Take the MAX time so if it does not take the deleted time of a node deleted through a duplication migration.
WITH DISTINCT rel,
deleted_edge.branch AS deleted_edge_branch,
deleted_edge.branch_level AS branch_level,
MAX(deleted_edge.from) as deleted_time,
deleted_node.branch_support as deleted_node_branch_support


// No need to check deleted edge branch because
// If deleted_node has different branch support type (agnostic/aware) than rel type,
// there might already be a deleted edge that we would not match if we filter on deleted_edge_branch.
// If both are aware, it still works, as we would have one Relationship node for each branch on which this relationship exists.
MATCH (rel)-[]-(peer_2)
WHERE NOT exists((rel)-[{status: "deleted"}]-(peer_2))


// If res is agnostic and delete node is agnostic, we should delete on global branch
// If rel is aware and deleted node is aware, we should use deleted edge branch
// If rel is aware and delete node is agnostic, we need to create deleted edges for every distinct branch on which this relationship exists.
WITH DISTINCT
CASE
// Branch on which `deleted` edge should be created depends on rel.branch_support.
WHEN rel.branch_support = $branch_agnostic
THEN CASE
WHEN deleted_node_branch_support = $branch_agnostic THEN [$global_branch]
ELSE []
END
ELSE
CASE
WHEN deleted_node_branch_support = $branch_agnostic
THEN COLLECT {
WITH rel
MATCH (rel)-[active_edge {status: "active"}]-(peer_2)
RETURN DISTINCT active_edge.branch
}
ELSE
CASE
// if no active edge on this branch exists it means this relationship node is dedicated for another branch
WHEN exists((rel)-[{status: "active", branch: deleted_edge_branch}]-(peer_2)) THEN [deleted_edge_branch]
ELSE []
END
END
END AS branches,
branch_level,
deleted_time,
peer_2,
rel

UNWIND branches as branch

// Then creates `deleted` edge.
// Below CALL subqueries are called once for each rel-peer_2 pair for which we want to create a deleted edge.
// Note that with current infrahub relationships edges design, only one of this CALL should be matched per pair.

CALL {
WITH rel, peer_2, branch, branch_level, deleted_time
MATCH (rel)-[:IS_RELATED]->(peer_2)
MERGE (rel)-[:IS_RELATED {status: "deleted", branch: branch, branch_level: branch_level, from: deleted_time}]->(peer_2)
}

CALL {
WITH rel, peer_2, branch, branch_level, deleted_time
MATCH (rel)-[:IS_PROTECTED]->(peer_2)
MERGE (rel)-[:IS_PROTECTED {status: "deleted", branch: branch, branch_level: branch_level, from: deleted_time}]->(peer_2)
}

CALL {
WITH rel, peer_2, branch, branch_level, deleted_time
MATCH (rel)-[:IS_VISIBLE]->(peer_2)
MERGE (rel)-[:IS_VISIBLE {status: "deleted", branch: branch, branch_level: branch_level, from: deleted_time}]->(peer_2)
}

CALL {
WITH rel, peer_2, branch, branch_level, deleted_time
MATCH (rel)-[:HAS_OWNER]->(peer_2)
MERGE (rel)-[:HAS_OWNER {status: "deleted", branch: branch, branch_level: branch_level, from: deleted_time}]->(peer_2)
}

CALL {
WITH rel, peer_2, branch, branch_level, deleted_time
MATCH (rel)-[:HAS_SOURCE]->(peer_2)
MERGE (rel)-[:HAS_SOURCE {status: "deleted", branch: branch, branch_level: branch_level, from: deleted_time}]->(peer_2)
}

CALL {
WITH rel, peer_2, branch, branch_level, deleted_time
MATCH (rel)<-[:IS_RELATED]-(peer_2)
MERGE (rel)<-[:IS_RELATED {status: "deleted", branch: branch, branch_level: branch_level, from: deleted_time}]-(peer_2)
}

CALL {
WITH rel, peer_2, branch, branch_level, deleted_time
MATCH (rel)<-[:IS_PROTECTED]-(peer_2)
MERGE (rel)<-[:IS_PROTECTED {status: "deleted", branch: branch, branch_level: branch_level, from: deleted_time}]-(peer_2)
}

CALL {
WITH rel, peer_2, branch, branch_level, deleted_time
MATCH (rel)<-[:IS_VISIBLE]-(peer_2)
MERGE (rel)<-[:IS_VISIBLE {status: "deleted", branch: branch, branch_level: branch_level, from: deleted_time}]-(peer_2)
}

CALL {
WITH rel, peer_2, branch, branch_level, deleted_time
MATCH (rel)<-[:HAS_OWNER]-(peer_2)
MERGE (rel)<-[:HAS_OWNER {status: "deleted", branch: branch, branch_level: branch_level, from: deleted_time}]-(peer_2)
}

CALL {
WITH rel, peer_2, branch, branch_level, deleted_time
MATCH (rel)<-[:HAS_SOURCE]-(peer_2)
MERGE (rel)<-[:HAS_SOURCE {status: "deleted", branch: branch, branch_level: branch_level, from: deleted_time}]-(peer_2)
}
"""

params = {
"global_branch": GLOBAL_BRANCH_NAME,
"branch_aware": BranchSupportType.AWARE.value,
"branch_agnostic": BranchSupportType.AGNOSTIC.value,
}

self.params.update(params)
self.add_to_query(query)


class Migration019(GraphMigration):
"""
Fix corrupted state introduced by Migration012 when duplicating a CoreAccount (branch Aware)
being part of a CoreStandardGroup (branch Agnostic). Database is corrupted at multiple points:
- Old CoreAccount node <> group_member node `active` edge has no `to` time (possibly because of #5590).
- Old CoreAccount node <> group_member node `deleted` edge is on `$global_branch` branch instead of `main`.
- New CoreAccount node <> group_member node `active` edge is on `$global_branch` branch instead of `main`.

Also, users having deleted corresponding CoreStandardGroup will also have the following data corruption,
as deletion did not happen correctly due to above issues:
- Both CoreAccount <> group_member and CoreStandardGroup <> group_member edges
have not been deleted (ie status is `active` without `to` time and no additional `deleted` edge).

This migration fixes all above issues to have consistent edges, and fixes IFC-1204.
"""

name: str = "019_fix_edges_state"
minimum_version: int = 18
queries: Sequence[type[Query]] = [FixBranchAwareEdgesQuery, SetMissingToTimeQuery, DeleteNodesRelsQuery]

async def validate_migration(self, db: InfrahubDatabase) -> MigrationResult:
result = MigrationResult()
return result
20 changes: 8 additions & 12 deletions backend/infrahub/core/relationship/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -884,6 +884,8 @@ def get_branch_based_on_support_type(self) -> Branch:
"""If the attribute is branch aware, return the Branch object associated with this attribute
If the attribute is branch agnostic return the Global Branch
Note that if this relationship is Aware and source node is Agnostic, it will return -global- branch.
Returns:
Branch:
"""
Expand Down Expand Up @@ -959,7 +961,7 @@ async def _fetch_relationships(
self.has_fetched_relationships = True

for peer_id in details.peer_ids_present_local_only:
await self.remove(peer_id=peer_id, db=db)
await self.remove_locally(peer_id=peer_id, db=db)

async def get(self, db: InfrahubDatabase) -> Relationship | list[Relationship] | None:
rels = await self.get_relationships(db=db)
Expand Down Expand Up @@ -1077,22 +1079,17 @@ async def resolve(self, db: InfrahubDatabase) -> None:
for rel in self._relationships:
await rel.resolve(db=db)

async def remove(
async def remove_locally(
self,
peer_id: Union[str, UUID],
db: InfrahubDatabase,
update_db: bool = False,
) -> bool:
"""Remove a peer id from the local relationships list,
need to investigate if and when we should update the relationship in the database."""
"""Remove a peer id from the local relationships list"""

for idx, rel in enumerate(await self.get_relationships(db=db)):
if str(rel.peer_id) != str(peer_id):
continue

if update_db:
await rel.delete(db=db)

self._relationships.pop(idx)
return True

Expand All @@ -1109,14 +1106,13 @@ async def remove_in_db(

# - Update the existing relationship if we are on the same branch
rel_ids_per_branch = peer_data.rel_ids_per_branch()

# In which cases do we end up here and do not want to set `to` time?
if branch.name in rel_ids_per_branch:
await update_relationships_to([str(ri) for ri in rel_ids_per_branch[branch.name]], to=remove_at, db=db)

# - Create a new rel of type DELETED if the existing relationship is on a different branch
rel_branches: set[str] = set()
if peer_data.rels:
rel_branches = {r.branch for r in peer_data.rels}
if rel_branches == {peer_data.branch}:
if peer_data.rels and {r.branch for r in peer_data.rels} == {peer_data.branch}:
return

query = await RelationshipDataDeleteQuery.init(
Expand Down
1 change: 1 addition & 0 deletions backend/infrahub/core/schema/definitions/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,7 @@
"optional": True,
"identifier": "group_member",
"cardinality": "many",
"branch": BranchSupportType.AWARE,
},
{
"name": "subscribers",
Expand Down
14 changes: 12 additions & 2 deletions backend/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -969,14 +969,24 @@ def car_person_branch_agnostic_schema() -> dict[str, Any]:
],
"relationships": [
{
"name": "owner",
"name": "agnostic_owner",
"label": "Commander of Car",
"peer": "TestPerson",
"optional": False,
"kind": "Parent",
"cardinality": "one",
"direction": "outbound",
"branch": BranchSupportType.AGNOSTIC.value,
"identifier": "agnostic_owner",
},
{
"name": "aware_owner",
"label": "Commander of Car",
"peer": "TestPerson",
"optional": True,
"cardinality": "one",
"direction": "outbound",
"branch": BranchSupportType.AWARE.value,
"identifier": "aware_owner",
},
],
},
Expand Down
Loading
Loading