Skip to content

Commit

Permalink
feat: add support for disabling alert rules forwarding (#252)
Browse files Browse the repository at this point in the history
* feat: add support for disabling alert rules forwarding

* chore: fetch new library

* fix things

* tox fmt

* changes

* fixed somewhat a unittest

* fix scenario test

* add loki alert silencing

* fix things

* improve scenario test

* fix in loki lib

* rephrase assert in scenario test

* update prometheus scrape lib

* fmt and new lib

* move wokeignore to repo root

* remove accidentally commited files

* pr comments

* fetch-lib prometheus libraries

---------

Co-authored-by: Pietro Pasotti <pietro.pasotti@canonical.com>
  • Loading branch information
lucabello and PietroPasotti authored Feb 10, 2025
1 parent c53ff17 commit 6ef4f2c
Show file tree
Hide file tree
Showing 11 changed files with 280 additions and 26 deletions.
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,8 @@ crash.log
.terraform/
terraform.tfvars
terraform.tfvars.json

# Created by tox -e unit
/grafana_dashboards/
/loki_alert_rules/
/prometheus_alert_rules/
2 changes: 2 additions & 0 deletions .wokeignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
lib/charms/certificate_transfer_interface
src/prometheus_alert_rules/network.rules
5 changes: 5 additions & 0 deletions charmcraft.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -231,3 +231,8 @@ config:
Ref (__path_exclude__): https://grafana.com/docs/loki/latest/send-data/promtail/scraping/
type: string
default: ""
forward_alert_rules:
description: >
Toggle forwarding of alert rules.
type: boolean
default: true
1 change: 0 additions & 1 deletion lib/.wokeignore

This file was deleted.

45 changes: 40 additions & 5 deletions lib/charms/loki_k8s/v1/loki_push_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -533,7 +533,7 @@ def __init__(self, ...):
RelationRole,
WorkloadEvent,
)
from ops.framework import EventBase, EventSource, Object, ObjectEvents
from ops.framework import BoundEvent, EventBase, EventSource, Object, ObjectEvents
from ops.jujuversion import JujuVersion
from ops.model import Container, ModelError, Relation
from ops.pebble import APIError, ChangeError, Layer, PathError, ProtocolError
Expand All @@ -546,7 +546,7 @@ def __init__(self, ...):

# Increment this PATCH version before using `charmcraft publish-lib` or reset
# to 0 if you are raising the major API version
LIBPATCH = 13
LIBPATCH = 15

PYDEPS = ["cosl"]

Expand Down Expand Up @@ -1543,10 +1543,13 @@ def __init__(
alert_rules_path: str = DEFAULT_ALERT_RULES_RELATIVE_PATH,
recursive: bool = False,
skip_alert_topology_labeling: bool = False,
*,
forward_alert_rules: bool = True,
):
super().__init__(charm, relation_name)
self._charm = charm
self._relation_name = relation_name
self._forward_alert_rules = forward_alert_rules
self.topology = JujuTopology.from_charm(charm)

try:
Expand All @@ -1569,7 +1572,8 @@ def _handle_alert_rules(self, relation):
alert_rules = (
AlertRules(None) if self._skip_alert_topology_labeling else AlertRules(self.topology)
)
alert_rules.add_path(self._alert_rules_path, recursive=self._recursive)
if self._forward_alert_rules:
alert_rules.add_path(self._alert_rules_path, recursive=self._recursive)
alert_rules_as_dict = alert_rules.as_dict()

relation.data[self._charm.app]["metadata"] = json.dumps(self.topology.as_dict())
Expand Down Expand Up @@ -1617,6 +1621,9 @@ def __init__(
alert_rules_path: str = DEFAULT_ALERT_RULES_RELATIVE_PATH,
recursive: bool = True,
skip_alert_topology_labeling: bool = False,
*,
refresh_event: Optional[Union[BoundEvent, List[BoundEvent]]] = None,
forward_alert_rules: bool = True,
):
"""Construct a Loki charm client.
Expand All @@ -1642,6 +1649,9 @@ def __init__(
alert_rules_path: a string indicating a path where alert rules can be found
recursive: Whether to scan for rule files recursively.
skip_alert_topology_labeling: whether to skip the alert topology labeling.
forward_alert_rules: a boolean flag to toggle forwarding of charmed alert rules.
refresh_event: an optional bound event or list of bound events which
will be observed to re-set scrape job data (IP address and others)
Raises:
RelationNotFoundError: If there is no relation in the charm's metadata.yaml
Expand All @@ -1667,14 +1677,26 @@ def __init__(
charm, relation_name, RELATION_INTERFACE_NAME, RelationRole.requires
)
super().__init__(
charm, relation_name, alert_rules_path, recursive, skip_alert_topology_labeling
charm,
relation_name,
alert_rules_path,
recursive,
skip_alert_topology_labeling,
forward_alert_rules=forward_alert_rules,
)
events = self._charm.on[relation_name]
self.framework.observe(self._charm.on.upgrade_charm, self._on_lifecycle_event)
self.framework.observe(self._charm.on.config_changed, self._on_lifecycle_event)
self.framework.observe(events.relation_joined, self._on_logging_relation_joined)
self.framework.observe(events.relation_changed, self._on_logging_relation_changed)
self.framework.observe(events.relation_departed, self._on_logging_relation_departed)

if refresh_event:
if not isinstance(refresh_event, list):
refresh_event = [refresh_event]
for ev in refresh_event:
self.framework.observe(ev, self._on_lifecycle_event)

def _on_lifecycle_event(self, _: HookEvent):
"""Update require relation data on charm upgrades and other lifecycle events.
Expand Down Expand Up @@ -2550,10 +2572,17 @@ def __init__(
alert_rules_path: str = DEFAULT_ALERT_RULES_RELATIVE_PATH,
recursive: bool = True,
skip_alert_topology_labeling: bool = False,
refresh_event: Optional[Union[BoundEvent, List[BoundEvent]]] = None,
forward_alert_rules: bool = True,
):
_PebbleLogClient.check_juju_version()
super().__init__(
charm, relation_name, alert_rules_path, recursive, skip_alert_topology_labeling
charm,
relation_name,
alert_rules_path,
recursive,
skip_alert_topology_labeling,
forward_alert_rules=forward_alert_rules,
)
self._charm = charm
self._relation_name = relation_name
Expand All @@ -2564,6 +2593,12 @@ def __init__(
self.framework.observe(on.relation_departed, self._update_logging)
self.framework.observe(on.relation_broken, self._update_logging)

if refresh_event:
if not isinstance(refresh_event, list):
refresh_event = [refresh_event]
for ev in refresh_event:
self.framework.observe(ev, self._update_logging)

for container_name in self._charm.meta.containers.keys():
snake_case_container_name = container_name.replace("-", "_")
self.framework.observe(
Expand Down
59 changes: 48 additions & 11 deletions lib/charms/prometheus_k8s/v0/prometheus_scrape.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,8 +340,8 @@ def _on_scrape_targets_changed(self, event):

import yaml
from cosl import JujuTopology
from cosl.rules import AlertRules
from ops.charm import CharmBase, RelationRole
from cosl.rules import AlertRules, generic_alert_groups
from ops.charm import CharmBase, RelationJoinedEvent, RelationRole
from ops.framework import (
BoundEvent,
EventBase,
Expand All @@ -362,7 +362,7 @@ def _on_scrape_targets_changed(self, event):

# Increment this PATCH version before using `charmcraft publish-lib` or reset
# to 0 if you are raising the major API version
LIBPATCH = 48
LIBPATCH = 50

PYDEPS = ["cosl"]

Expand Down Expand Up @@ -1309,6 +1309,8 @@ def __init__(
refresh_event: Optional[Union[BoundEvent, List[BoundEvent]]] = None,
external_url: str = "",
lookaside_jobs_callable: Optional[Callable] = None,
*,
forward_alert_rules: bool = True,
):
"""Construct a metrics provider for a Prometheus charm.
Expand Down Expand Up @@ -1411,6 +1413,7 @@ def __init__(
files. Defaults to "./prometheus_alert_rules",
resolved relative to the directory hosting the charm entry file.
The alert rules are automatically updated on charm upgrade.
forward_alert_rules: a boolean flag to toggle forwarding of charmed alert rules.
refresh_event: an optional bound event or list of bound events which
will be observed to re-set scrape job data (IP address and others)
external_url: an optional argument that represents an external url that
Expand Down Expand Up @@ -1449,6 +1452,7 @@ def __init__(

self._charm = charm
self._alert_rules_path = alert_rules_path
self._forward_alert_rules = forward_alert_rules
self._relation_name = relation_name
# sanitize job configurations to the supported subset of parameters
jobs = [] if jobs is None else jobs
Expand Down Expand Up @@ -1530,7 +1534,11 @@ def set_scrape_job_spec(self, _=None):
return

alert_rules = AlertRules(query_type="promql", topology=self.topology)
alert_rules.add_path(self._alert_rules_path, recursive=True)
if self._forward_alert_rules:
alert_rules.add_path(self._alert_rules_path, recursive=True)
alert_rules.add(
generic_alert_groups.application_rules, group_name_prefix=self.topology.identifier
)
alert_rules_as_dict = alert_rules.as_dict()

for relation in self._charm.model.relations[self._relation_name]:
Expand Down Expand Up @@ -1776,6 +1784,9 @@ def __init__(
relation_names: Optional[dict] = None,
relabel_instance=True,
resolve_addresses=False,
path_to_own_alert_rules: Optional[str] = None,
*,
forward_alert_rules: bool = True,
):
"""Construct a `MetricsEndpointAggregator`.
Expand All @@ -1795,6 +1806,8 @@ def __init__(
resolve_addresses: A boolean flag indiccating if the aggregator
should attempt to perform DNS lookups of targets and append
a `dns_name` label
path_to_own_alert_rules: Optionally supply a path for alert rule files
forward_alert_rules: a boolean flag to toggle forwarding of charmed alert rules
"""
self._charm = charm

Expand All @@ -1807,15 +1820,21 @@ def __init__(
self._alert_rules_relation = relation_names.get("alert_rules", "prometheus-rules")

super().__init__(charm, self._prometheus_relation)
self.topology = JujuTopology.from_charm(charm)

self._stored.set_default(jobs=[], alert_rules=[])

self._relabel_instance = relabel_instance
self._resolve_addresses = resolve_addresses

self._forward_alert_rules = forward_alert_rules

# manage Prometheus charm relation events
prometheus_events = self._charm.on[self._prometheus_relation]
self.framework.observe(prometheus_events.relation_joined, self._set_prometheus_data)

self.path_to_own_alert_rules = path_to_own_alert_rules

# manage list of Prometheus scrape jobs from related scrape targets
target_events = self._charm.on[self._target_relation]
self.framework.observe(target_events.relation_changed, self._on_prometheus_targets_changed)
Expand All @@ -1828,7 +1847,7 @@ def __init__(
self.framework.observe(alert_rule_events.relation_changed, self._on_alert_rules_changed)
self.framework.observe(alert_rule_events.relation_departed, self._on_alert_rules_departed)

def _set_prometheus_data(self, event):
def _set_prometheus_data(self, event: Optional[RelationJoinedEvent] = None):
"""Ensure every new Prometheus instances is updated.
Any time a new Prometheus unit joins the relation with
Expand All @@ -1838,6 +1857,7 @@ def _set_prometheus_data(self, event):
if not self._charm.unit.is_leader():
return

# Gather the scrape jobs
jobs = [] + _type_convert_stored(
self._stored.jobs # pyright: ignore
) # list of scrape jobs, one per relation
Expand All @@ -1846,6 +1866,7 @@ def _set_prometheus_data(self, event):
if targets and relation.app:
jobs.append(self._static_scrape_job(targets, relation.app.name))

# Gather the alert rules
groups = [] + _type_convert_stored(
self._stored.alert_rules # pyright: ignore
) # list of alert rule groups
Expand All @@ -1856,9 +1877,23 @@ def _set_prometheus_data(self, event):
rules = self._label_alert_rules(unit_rules, appname)
group = {"name": self.group_name(appname), "rules": rules}
groups.append(group)

event.relation.data[self._charm.app]["scrape_jobs"] = json.dumps(jobs)
event.relation.data[self._charm.app]["alert_rules"] = json.dumps({"groups": groups})
alert_rules = AlertRules(query_type="promql", topology=self.topology)
# Add alert rules from file
if self.path_to_own_alert_rules:
alert_rules.add_path(self.path_to_own_alert_rules, recursive=True)
# Add generic alert rules
alert_rules.add(
generic_alert_groups.application_rules, group_name_prefix=self.topology.identifier
)
groups.extend(alert_rules.as_dict()["groups"])

# Set scrape jobs and alert rules in relation data
relations = [event.relation] if event else self.model.relations[self._prometheus_relation]
for rel in relations:
rel.data[self._charm.app]["scrape_jobs"] = json.dumps(jobs) # type: ignore
rel.data[self._charm.app]["alert_rules"] = json.dumps( # type: ignore
{"groups": groups if self._forward_alert_rules else []}
)

def _on_prometheus_targets_changed(self, event):
"""Update scrape jobs in response to scrape target changes.
Expand Down Expand Up @@ -2129,7 +2164,9 @@ def set_alert_rule_data(self, name: str, unit_rules: dict, label_rules: bool = T

if updated_group["name"] not in [g["name"] for g in groups]:
groups.append(updated_group)
relation.data[self._charm.app]["alert_rules"] = json.dumps({"groups": groups})
relation.data[self._charm.app]["alert_rules"] = json.dumps(
{"groups": groups if self._forward_alert_rules else []}
)

if not _type_convert_stored(self._stored.alert_rules) == groups: # pyright: ignore
self._stored.alert_rules = groups
Expand Down Expand Up @@ -2177,8 +2214,8 @@ def remove_alert_rules(self, group_name: str, unit_name: str) -> None:
changed_group["rules"] = rules_kept # type: ignore
groups.append(changed_group)

relation.data[self._charm.app]["alert_rules"] = (
json.dumps({"groups": groups}) if groups else "{}"
relation.data[self._charm.app]["alert_rules"] = json.dumps(
{"groups": groups if self._forward_alert_rules else []}
)

if not _type_convert_stored(self._stored.alert_rules) == groups: # pyright: ignore
Expand Down
Loading

0 comments on commit 6ef4f2c

Please sign in to comment.