Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update cos_agent lib with generic HostHealth rules #232

Merged
merged 7 commits into from
Feb 5, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions lib/charms/grafana_agent/v0/cos_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ def __init__(self, *args):

import pydantic
from cosl import DashboardPath40UID, JujuTopology, LZMABase64
from cosl.rules import AlertRules
from cosl.rules import AlertRules, generic_alert_groups
from ops.charm import RelationChangedEvent
from ops.framework import EventBase, EventSource, Object, ObjectEvents
from ops.model import ModelError, Relation
Expand All @@ -254,7 +254,7 @@ class _MetricsEndpointDict(TypedDict):

LIBID = "dc15fa84cef84ce58155fb84f6c6213a"
LIBAPI = 0
LIBPATCH = 17
LIBPATCH = 18

PYDEPS = ["cosl >= 0.0.50", "pydantic"]

Expand All @@ -268,7 +268,6 @@ class _MetricsEndpointDict(TypedDict):
logger = logging.getLogger(__name__)
SnapEndpoint = namedtuple("SnapEndpoint", "owner, name")


# Note: MutableMapping is imported from the typing module and not collections.abc
# because subscripting collections.abc.MutableMapping was added in python 3.9, but
# most of our charms are based on 20.04, which has python 3.8.
Expand Down Expand Up @@ -732,6 +731,10 @@ def _metrics_alert_rules(self) -> Dict:
query_type="promql", topology=JujuTopology.from_charm(self._charm)
)
alert_rules.add_path(self._metrics_rules, recursive=self._recursive)
alert_rules.add(
generic_alert_groups.application_rules,
group_name_prefix=JujuTopology.from_charm(self._charm).identifier,
)
return alert_rules.as_dict()

@property
Expand Down
25 changes: 0 additions & 25 deletions src/prometheus_alert_rules/host_health.rules

This file was deleted.

30 changes: 29 additions & 1 deletion tests/scenario/test_cos_agent_e2e.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
COSAgentProvider,
COSAgentRequirer,
)
from cosl.rules import generic_alert_groups
from ops.charm import CharmBase
from ops.framework import Framework
from ops.testing import Context, PeerRelation, State, SubordinateRelation
Expand Down Expand Up @@ -119,6 +120,25 @@ def requirer_ctx(requirer_charm):
return Context(charm_type=requirer_charm, meta=requirer_charm.META)


def test_cos_agent_injects_generic_alerts(provider_ctx):
# GIVEN a cos-agent subordinate relation
cos_agent = SubordinateRelation("cos-agent")

# WHEN the relation_changed event fires
state_out = provider_ctx.run(
provider_ctx.on.relation_changed(relation=cos_agent, remote_unit=1),
State(relations=[cos_agent]),
)

config = json.loads(
state_out.get_relation(cos_agent.id).local_unit_data[CosAgentPeersUnitData.KEY]
)
# THEN the metrics_alert_rules groups should only contain the generic alert groups
assert (
config["metrics_alert_rules"]["groups"] == generic_alert_groups.application_rules["groups"]
)


def test_cos_agent_changed_no_remote_data(provider_ctx):
cos_agent = SubordinateRelation("cos-agent")

Expand All @@ -130,7 +150,15 @@ def test_cos_agent_changed_no_remote_data(provider_ctx):
config = json.loads(
state_out.get_relation(cos_agent.id).local_unit_data[CosAgentPeersUnitData.KEY]
)
assert config["metrics_alert_rules"] == {}

# the cos_agent lib injects generic (HostHealth) alert rules and should be filtered for the test
config["metrics_alert_rules"]["groups"] = [
group
for group in config["metrics_alert_rules"]["groups"]
if "_HostHealth_" not in group["name"]
]

assert config["metrics_alert_rules"] == {"groups": []}
assert config["log_alert_rules"] == {}
assert len(config["dashboards"]) == 1
assert len(config["metrics_scrape_jobs"]) == 1
Expand Down
Loading