Skip to content

Commit

Permalink
Merge pull request #5342 from opsmill/ajtm-12312024-skip-diff-no-changes
Browse files Browse the repository at this point in the history
skip expensive diff operations when possible
  • Loading branch information
dgarros authored Jan 9, 2025
2 parents d231ae7 + e10dc77 commit a120fb8
Show file tree
Hide file tree
Showing 35 changed files with 841 additions and 209 deletions.
3 changes: 2 additions & 1 deletion backend/infrahub/core/branch/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ async def rebase_branch(branch: str) -> None:
service=service,
)
diff_repository = await component_registry.get_component(DiffRepository, db=db, branch=obj)
enriched_diff = await diff_coordinator.update_branch_diff(base_branch=base_branch, diff_branch=obj)
enriched_diff = await diff_coordinator.update_branch_diff_and_return(base_branch=base_branch, diff_branch=obj)
if enriched_diff.get_all_conflicts():
raise ValidationError(
f"Branch {obj.name} contains conflicts with the default branch that must be addressed."
Expand Down Expand Up @@ -185,6 +185,7 @@ async def merge_branch(branch: str) -> None:
try:
await merger.merge()
except Exception as exc:
log.exception("Merge failed, beginning rollback")
await merger.rollback()
raise MergeFailedError(branch_name=branch) from exc
await merger.update_schema()
Expand Down
1 change: 1 addition & 0 deletions backend/infrahub/core/diff/combiner.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,7 @@ def _combine_relationships(
combined_relationship = EnrichedDiffRelationship(
name=later_relationship.name,
label=later_relationship.label,
identifier=later_relationship.identifier,
cardinality=later_relationship.cardinality,
changed_at=later_relationship.changed_at or earlier_relationship.changed_at,
action=combined_action,
Expand Down
373 changes: 286 additions & 87 deletions backend/infrahub/core/diff/coordinator.py

Large diffs are not rendered by default.

37 changes: 33 additions & 4 deletions backend/infrahub/core/diff/data_check_synchronizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@
from infrahub.proposed_change.constants import ProposedChangeState

from .conflicts_extractor import DiffConflictsExtractor
from .model.path import ConflictSelection, EnrichedDiffConflict, EnrichedDiffRoot
from .model.diff import DataConflict
from .model.path import ConflictSelection, EnrichedDiffConflict, EnrichedDiffRoot, EnrichedDiffRootMetadata
from .repository.repository import DiffRepository


class DiffDataCheckSynchronizer:
Expand All @@ -18,12 +20,28 @@ def __init__(
db: InfrahubDatabase,
conflicts_extractor: DiffConflictsExtractor,
conflict_recorder: ObjectConflictValidatorRecorder,
diff_repository: DiffRepository,
):
self.db = db
self.conflicts_extractor = conflicts_extractor
self.conflict_recorder = conflict_recorder
self.diff_repository = diff_repository
self._enriched_conflicts_map: dict[str, EnrichedDiffConflict] | None = None
self._data_conflicts: list[DataConflict] | None = None

async def synchronize(self, enriched_diff: EnrichedDiffRoot) -> list[Node]:
def _get_enriched_conflicts_map(self, enriched_diff: EnrichedDiffRoot) -> dict[str, EnrichedDiffConflict]:
if self._enriched_conflicts_map is None:
self._enriched_conflicts_map = enriched_diff.get_all_conflicts()
return self._enriched_conflicts_map

async def _get_data_conflicts(self, enriched_diff: EnrichedDiffRoot) -> list[DataConflict]:
if self._data_conflicts is None:
self._data_conflicts = await self.conflicts_extractor.get_data_conflicts(enriched_diff_root=enriched_diff)
return self._data_conflicts

async def synchronize(self, enriched_diff: EnrichedDiffRoot | EnrichedDiffRootMetadata) -> list[Node]:
self._enriched_conflicts_map = None
self._data_conflicts = None
try:
proposed_changes = await NodeManager.query(
db=self.db,
Expand All @@ -35,10 +53,21 @@ async def synchronize(self, enriched_diff: EnrichedDiffRoot) -> list[Node]:
proposed_changes = []
if not proposed_changes:
return []
enriched_conflicts_map = enriched_diff.get_all_conflicts()
data_conflicts = await self.conflicts_extractor.get_data_conflicts(enriched_diff_root=enriched_diff)
all_data_checks = []
for pc in proposed_changes:
# if the enriched_diff is EnrichedDiffRootMetadata, then it has no new data in it
if not isinstance(enriched_diff, EnrichedDiffRoot):
has_validator = bool(await self.conflict_recorder.get_validator(proposed_change=pc))
# if this pc does not have a validator, then it is a new ProposedChange
if has_validator:
continue
# if this is a new ProposedChange, we need to hydrate then EnrichedDiffRoot so that we can get the conflicts from it
enriched_diff = await self.diff_repository.get_one(
diff_branch_name=enriched_diff.diff_branch_name, diff_id=enriched_diff.uuid
)

data_conflicts = await self._get_data_conflicts(enriched_diff=enriched_diff)
enriched_conflicts_map = self._get_enriched_conflicts_map(enriched_diff=enriched_diff)
core_data_checks = await self.conflict_recorder.record_conflicts(
proposed_change_id=pc.get_id(), conflicts=data_conflicts
)
Expand Down
2 changes: 2 additions & 0 deletions backend/infrahub/core/diff/enricher/hierarchy.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ async def _enrich_hierarchical_nodes(
parent_kind=ancestor.kind,
parent_label="",
parent_rel_name=parent_rel.name,
parent_rel_identifier=parent_rel.get_identifier(),
parent_rel_cardinality=parent_rel.cardinality,
parent_rel_label=parent_rel.label or "",
)
Expand Down Expand Up @@ -150,6 +151,7 @@ async def _enrich_nodes_with_parent(
parent_kind=peer_parent.peer_kind,
parent_label="",
parent_rel_name=parent_rel.name,
parent_rel_identifier=parent_rel.get_identifier(),
parent_rel_cardinality=parent_rel.cardinality,
parent_rel_label=parent_rel.label or "",
)
Expand Down
2 changes: 1 addition & 1 deletion backend/infrahub/core/diff/merger/merger.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def __init__(
self.serializer = serializer

async def merge_graph(self, at: Timestamp) -> None:
enriched_diffs = await self.diff_repository.get_empty_roots(
enriched_diffs = await self.diff_repository.get_roots_metadata(
diff_branch_names=[self.source_branch.name], base_branch_names=[self.destination_branch.name]
)
latest_diff = None
Expand Down
88 changes: 85 additions & 3 deletions backend/infrahub/core/diff/model/path.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from __future__ import annotations

from dataclasses import dataclass, field, replace
from dataclasses import asdict, dataclass, field, replace
from enum import Enum
from typing import TYPE_CHECKING, Any, Optional
from uuid import uuid4

from infrahub.core.constants import (
BranchSupportType,
Expand Down Expand Up @@ -239,6 +240,7 @@ def from_calculated_element(cls, calculated_element: DiffSingleRelationship) ->
@dataclass
class EnrichedDiffRelationship(BaseSummary):
name: str
identifier: str
label: str
cardinality: RelationshipCardinality
path_identifier: str = field(default="", kw_only=True)
Expand Down Expand Up @@ -270,6 +272,7 @@ def include_in_response(self) -> bool:
def from_calculated_relationship(cls, calculated_relationship: DiffRelationship) -> EnrichedDiffRelationship:
return EnrichedDiffRelationship(
name=calculated_relationship.name,
identifier=calculated_relationship.identifier,
label="",
cardinality=calculated_relationship.cardinality,
changed_at=calculated_relationship.changed_at,
Expand Down Expand Up @@ -403,14 +406,43 @@ def from_calculated_node(cls, calculated_node: DiffNode) -> EnrichedDiffNode:


@dataclass
class EnrichedDiffRoot(BaseSummary):
class EnrichedDiffRootMetadata(BaseSummary):
base_branch_name: str
diff_branch_name: str
from_time: Timestamp
to_time: Timestamp
uuid: str
partner_uuid: str
tracking_id: TrackingId | None = field(default=None, kw_only=True)

def __hash__(self) -> int:
return hash(self.uuid)

@property
def time_range(self) -> Interval:
return self.to_time.obj - self.from_time.obj

def update_metadata(
self,
from_time: Timestamp | None = None,
to_time: Timestamp | None = None,
tracking_id: TrackingId | None = None,
) -> bool:
is_changed = False
if from_time and self.from_time != from_time:
self.from_time = from_time
is_changed = True
if to_time and self.to_time != to_time:
self.to_time = to_time
is_changed = True
if self.tracking_id != tracking_id:
self.tracking_id = tracking_id
is_changed = True
return is_changed


@dataclass
class EnrichedDiffRoot(EnrichedDiffRootMetadata):
nodes: set[EnrichedDiffNode] = field(default_factory=set)

def __hash__(self) -> int:
Expand Down Expand Up @@ -446,6 +478,10 @@ def get_all_conflicts(self) -> dict[str, EnrichedDiffConflict]:
all_conflicts.update(node.get_all_conflicts())
return all_conflicts

@classmethod
def from_root_metadata(cls, empty_root: EnrichedDiffRootMetadata) -> EnrichedDiffRoot:
return EnrichedDiffRoot(**asdict(empty_root))

@classmethod
def from_calculated_diff(
cls, calculated_diff: DiffRoot, base_branch_name: str, partner_uuid: str
Expand All @@ -467,6 +503,7 @@ def add_parent(
parent_kind: str,
parent_label: str,
parent_rel_name: str,
parent_rel_identifier: str,
parent_rel_cardinality: RelationshipCardinality,
parent_rel_label: str = "",
) -> EnrichedDiffNode:
Expand All @@ -491,6 +528,7 @@ def add_parent(
node.relationships.add(
EnrichedDiffRelationship(
name=parent_rel_name,
identifier=parent_rel_identifier,
label=parent_rel_label,
cardinality=parent_rel_cardinality,
changed_at=None,
Expand All @@ -503,9 +541,48 @@ def add_parent(


@dataclass
class EnrichedDiffs:
class EnrichedDiffsMetadata:
base_branch_name: str
diff_branch_name: str
base_branch_diff: EnrichedDiffRootMetadata
diff_branch_diff: EnrichedDiffRootMetadata

def __repr__(self) -> str:
return (
f"{self.__class__.__name__}("
f"branch_uuid={self.diff_branch_diff.uuid},"
f"base_uuid={self.base_branch_diff.uuid},"
f"branch_name={self.diff_branch_name},"
f"base_name={self.base_branch_name},"
f"from_time={self.diff_branch_diff.from_time},"
f"to_time={self.diff_branch_diff.to_time})"
)

def update_metadata(
self,
from_time: Timestamp | None = None,
to_time: Timestamp | None = None,
tracking_id: TrackingId | None = None,
) -> bool:
is_changed = self.base_branch_diff.update_metadata(
from_time=from_time, to_time=to_time, tracking_id=tracking_id
)
is_changed |= self.diff_branch_diff.update_metadata(
from_time=from_time, to_time=to_time, tracking_id=tracking_id
)
return is_changed

def set_fresh_uuids(self) -> None:
base_uuid = str(uuid4())
branch_uuid = str(uuid4())
self.base_branch_diff.uuid = base_uuid
self.base_branch_diff.partner_uuid = branch_uuid
self.diff_branch_diff.uuid = branch_uuid
self.diff_branch_diff.partner_uuid = base_uuid


@dataclass
class EnrichedDiffs(EnrichedDiffsMetadata):
base_branch_diff: EnrichedDiffRoot
diff_branch_diff: EnrichedDiffRoot

Expand Down Expand Up @@ -539,6 +616,10 @@ def from_calculated_diffs(cls, calculated_diffs: CalculatedDiffs) -> EnrichedDif
diff_branch_diff=diff_branch_diff,
)

@property
def is_empty(self) -> bool:
return len(self.base_branch_diff.nodes) == 0 and len(self.diff_branch_diff.nodes) == 0


@dataclass
class CalculatedDiffs:
Expand Down Expand Up @@ -577,6 +658,7 @@ class DiffSingleRelationship:
@dataclass
class DiffRelationship:
name: str
identifier: str
cardinality: RelationshipCardinality
changed_at: Timestamp
action: DiffAction
Expand Down
33 changes: 0 additions & 33 deletions backend/infrahub/core/diff/query/empty_roots.py

This file was deleted.

35 changes: 35 additions & 0 deletions backend/infrahub/core/diff/query/field_specifiers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from typing import Any, Generator

from infrahub.core.query import Query, QueryType
from infrahub.database import InfrahubDatabase


class EnrichedDiffFieldSpecifiersQuery(Query):
name = "enriched_diff_field_specifiers"
type = QueryType.READ

def __init__(self, diff_id: str, **kwargs: Any) -> None:
super().__init__(**kwargs)
self.diff_id = diff_id

async def query_init(self, db: InfrahubDatabase, **kwargs: Any) -> None:
self.params["diff_id"] = self.diff_id
query = """
CALL {
MATCH (root:DiffRoot {uuid: $diff_id})-[:DIFF_HAS_NODE]->(node:DiffNode)-[:DIFF_HAS_ATTRIBUTE]->(attr:DiffAttribute)
RETURN node.uuid AS node_uuid, attr.name AS field_name
UNION
MATCH (root:DiffRoot {uuid: $diff_id})-[:DIFF_HAS_NODE]->(node:DiffNode)-[:DIFF_HAS_RELATIONSHIP]->(rel:DiffRelationship)
RETURN node.uuid AS node_uuid, rel.identifier AS field_name
}
"""
self.add_to_query(query=query)
self.return_labels = ["node_uuid", "field_name"]
self.order_by = ["node_uuid", "field_name"]

def get_node_field_specifier_tuples(self) -> Generator[tuple[str, str], None, None]:
for result in self.get_results():
node_uuid = result.get_as_str("node_uuid")
field_name = result.get_as_str("field_name")
if node_uuid and field_name:
yield (node_uuid, field_name)
Loading

0 comments on commit a120fb8

Please sign in to comment.