Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add support for disabling alert rules forwarding #252

Merged
merged 19 commits into from
Feb 10, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,8 @@ crash.log
.terraform/
terraform.tfvars
terraform.tfvars.json

# Created by tox -e unit
/grafana_dashboards/
/loki_alert_rules/
/prometheus_alert_rules/
2 changes: 2 additions & 0 deletions .wokeignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
lib/charms/certificate_transfer_interface
src/prometheus_alert_rules/network.rules
5 changes: 5 additions & 0 deletions charmcraft.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -231,3 +231,8 @@ config:
Ref (__path_exclude__): https://grafana.com/docs/loki/latest/send-data/promtail/scraping/
type: string
default: ""
forward_alert_rules:
description: >
Toggle forwarding of alert rules.
type: boolean
default: true
1 change: 0 additions & 1 deletion lib/.wokeignore

This file was deleted.

45 changes: 40 additions & 5 deletions lib/charms/loki_k8s/v1/loki_push_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -533,7 +533,7 @@ def __init__(self, ...):
RelationRole,
WorkloadEvent,
)
from ops.framework import EventBase, EventSource, Object, ObjectEvents
from ops.framework import BoundEvent, EventBase, EventSource, Object, ObjectEvents
from ops.jujuversion import JujuVersion
from ops.model import Container, ModelError, Relation
from ops.pebble import APIError, ChangeError, Layer, PathError, ProtocolError
Expand All @@ -546,7 +546,7 @@ def __init__(self, ...):

# Increment this PATCH version before using `charmcraft publish-lib` or reset
# to 0 if you are raising the major API version
LIBPATCH = 13
LIBPATCH = 15

PYDEPS = ["cosl"]

Expand Down Expand Up @@ -1543,10 +1543,13 @@ def __init__(
alert_rules_path: str = DEFAULT_ALERT_RULES_RELATIVE_PATH,
recursive: bool = False,
skip_alert_topology_labeling: bool = False,
*,
forward_alert_rules: bool = True,
):
super().__init__(charm, relation_name)
self._charm = charm
self._relation_name = relation_name
self._forward_alert_rules = forward_alert_rules
self.topology = JujuTopology.from_charm(charm)

try:
Expand All @@ -1569,7 +1572,8 @@ def _handle_alert_rules(self, relation):
alert_rules = (
AlertRules(None) if self._skip_alert_topology_labeling else AlertRules(self.topology)
)
alert_rules.add_path(self._alert_rules_path, recursive=self._recursive)
if self._forward_alert_rules:
alert_rules.add_path(self._alert_rules_path, recursive=self._recursive)
alert_rules_as_dict = alert_rules.as_dict()

relation.data[self._charm.app]["metadata"] = json.dumps(self.topology.as_dict())
Expand Down Expand Up @@ -1617,6 +1621,9 @@ def __init__(
alert_rules_path: str = DEFAULT_ALERT_RULES_RELATIVE_PATH,
recursive: bool = True,
skip_alert_topology_labeling: bool = False,
*,
refresh_event: Optional[Union[BoundEvent, List[BoundEvent]]] = None,
forward_alert_rules: bool = True,
):
"""Construct a Loki charm client.

Expand All @@ -1642,6 +1649,9 @@ def __init__(
alert_rules_path: a string indicating a path where alert rules can be found
recursive: Whether to scan for rule files recursively.
skip_alert_topology_labeling: whether to skip the alert topology labeling.
forward_alert_rules: a boolean flag to toggle forwarding of charmed alert rules.
refresh_event: an optional bound event or list of bound events which
will be observed to re-set scrape job data (IP address and others)

Raises:
RelationNotFoundError: If there is no relation in the charm's metadata.yaml
Expand All @@ -1667,14 +1677,26 @@ def __init__(
charm, relation_name, RELATION_INTERFACE_NAME, RelationRole.requires
)
super().__init__(
charm, relation_name, alert_rules_path, recursive, skip_alert_topology_labeling
charm,
relation_name,
alert_rules_path,
recursive,
skip_alert_topology_labeling,
forward_alert_rules=forward_alert_rules,
)
events = self._charm.on[relation_name]
self.framework.observe(self._charm.on.upgrade_charm, self._on_lifecycle_event)
self.framework.observe(self._charm.on.config_changed, self._on_lifecycle_event)
self.framework.observe(events.relation_joined, self._on_logging_relation_joined)
self.framework.observe(events.relation_changed, self._on_logging_relation_changed)
self.framework.observe(events.relation_departed, self._on_logging_relation_departed)

if refresh_event:
if not isinstance(refresh_event, list):
refresh_event = [refresh_event]
for ev in refresh_event:
self.framework.observe(ev, self._on_lifecycle_event)

def _on_lifecycle_event(self, _: HookEvent):
"""Update require relation data on charm upgrades and other lifecycle events.

Expand Down Expand Up @@ -2550,10 +2572,17 @@ def __init__(
alert_rules_path: str = DEFAULT_ALERT_RULES_RELATIVE_PATH,
recursive: bool = True,
skip_alert_topology_labeling: bool = False,
refresh_event: Optional[Union[BoundEvent, List[BoundEvent]]] = None,
forward_alert_rules: bool = True,
):
_PebbleLogClient.check_juju_version()
super().__init__(
charm, relation_name, alert_rules_path, recursive, skip_alert_topology_labeling
charm,
relation_name,
alert_rules_path,
recursive,
skip_alert_topology_labeling,
forward_alert_rules=forward_alert_rules,
)
self._charm = charm
self._relation_name = relation_name
Expand All @@ -2564,6 +2593,12 @@ def __init__(
self.framework.observe(on.relation_departed, self._update_logging)
self.framework.observe(on.relation_broken, self._update_logging)

if refresh_event:
if not isinstance(refresh_event, list):
refresh_event = [refresh_event]
for ev in refresh_event:
self.framework.observe(ev, self._update_logging)

for container_name in self._charm.meta.containers.keys():
snake_case_container_name = container_name.replace("-", "_")
self.framework.observe(
Expand Down
59 changes: 48 additions & 11 deletions lib/charms/prometheus_k8s/v0/prometheus_scrape.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,8 +340,8 @@ def _on_scrape_targets_changed(self, event):

import yaml
from cosl import JujuTopology
from cosl.rules import AlertRules
from ops.charm import CharmBase, RelationRole
from cosl.rules import AlertRules, generic_alert_groups
from ops.charm import CharmBase, RelationJoinedEvent, RelationRole
from ops.framework import (
BoundEvent,
EventBase,
Expand All @@ -362,7 +362,7 @@ def _on_scrape_targets_changed(self, event):

# Increment this PATCH version before using `charmcraft publish-lib` or reset
# to 0 if you are raising the major API version
LIBPATCH = 48
LIBPATCH = 50

PYDEPS = ["cosl"]

Expand Down Expand Up @@ -1309,6 +1309,8 @@ def __init__(
refresh_event: Optional[Union[BoundEvent, List[BoundEvent]]] = None,
external_url: str = "",
lookaside_jobs_callable: Optional[Callable] = None,
*,
forward_alert_rules: bool = True,
):
"""Construct a metrics provider for a Prometheus charm.

Expand Down Expand Up @@ -1411,6 +1413,7 @@ def __init__(
files. Defaults to "./prometheus_alert_rules",
resolved relative to the directory hosting the charm entry file.
The alert rules are automatically updated on charm upgrade.
forward_alert_rules: a boolean flag to toggle forwarding of charmed alert rules.
refresh_event: an optional bound event or list of bound events which
will be observed to re-set scrape job data (IP address and others)
external_url: an optional argument that represents an external url that
Expand Down Expand Up @@ -1449,6 +1452,7 @@ def __init__(

self._charm = charm
self._alert_rules_path = alert_rules_path
self._forward_alert_rules = forward_alert_rules
self._relation_name = relation_name
# sanitize job configurations to the supported subset of parameters
jobs = [] if jobs is None else jobs
Expand Down Expand Up @@ -1530,7 +1534,11 @@ def set_scrape_job_spec(self, _=None):
return

alert_rules = AlertRules(query_type="promql", topology=self.topology)
alert_rules.add_path(self._alert_rules_path, recursive=True)
if self._forward_alert_rules:
alert_rules.add_path(self._alert_rules_path, recursive=True)
alert_rules.add(
generic_alert_groups.application_rules, group_name_prefix=self.topology.identifier
)
alert_rules_as_dict = alert_rules.as_dict()

for relation in self._charm.model.relations[self._relation_name]:
Expand Down Expand Up @@ -1776,6 +1784,9 @@ def __init__(
relation_names: Optional[dict] = None,
relabel_instance=True,
resolve_addresses=False,
path_to_own_alert_rules: Optional[str] = None,
*,
forward_alert_rules: bool = True,
):
"""Construct a `MetricsEndpointAggregator`.

Expand All @@ -1795,6 +1806,8 @@ def __init__(
resolve_addresses: A boolean flag indiccating if the aggregator
should attempt to perform DNS lookups of targets and append
a `dns_name` label
path_to_own_alert_rules: Optionally supply a path for alert rule files
forward_alert_rules: a boolean flag to toggle forwarding of charmed alert rules
"""
self._charm = charm

Expand All @@ -1807,15 +1820,21 @@ def __init__(
self._alert_rules_relation = relation_names.get("alert_rules", "prometheus-rules")

super().__init__(charm, self._prometheus_relation)
self.topology = JujuTopology.from_charm(charm)

self._stored.set_default(jobs=[], alert_rules=[])

self._relabel_instance = relabel_instance
self._resolve_addresses = resolve_addresses

self._forward_alert_rules = forward_alert_rules

# manage Prometheus charm relation events
prometheus_events = self._charm.on[self._prometheus_relation]
self.framework.observe(prometheus_events.relation_joined, self._set_prometheus_data)

self.path_to_own_alert_rules = path_to_own_alert_rules

# manage list of Prometheus scrape jobs from related scrape targets
target_events = self._charm.on[self._target_relation]
self.framework.observe(target_events.relation_changed, self._on_prometheus_targets_changed)
Expand All @@ -1828,7 +1847,7 @@ def __init__(
self.framework.observe(alert_rule_events.relation_changed, self._on_alert_rules_changed)
self.framework.observe(alert_rule_events.relation_departed, self._on_alert_rules_departed)

def _set_prometheus_data(self, event):
def _set_prometheus_data(self, event: Optional[RelationJoinedEvent] = None):
"""Ensure every new Prometheus instances is updated.

Any time a new Prometheus unit joins the relation with
Expand All @@ -1838,6 +1857,7 @@ def _set_prometheus_data(self, event):
if not self._charm.unit.is_leader():
return

# Gather the scrape jobs
jobs = [] + _type_convert_stored(
self._stored.jobs # pyright: ignore
) # list of scrape jobs, one per relation
Expand All @@ -1846,6 +1866,7 @@ def _set_prometheus_data(self, event):
if targets and relation.app:
jobs.append(self._static_scrape_job(targets, relation.app.name))

# Gather the alert rules
groups = [] + _type_convert_stored(
self._stored.alert_rules # pyright: ignore
) # list of alert rule groups
Expand All @@ -1856,9 +1877,23 @@ def _set_prometheus_data(self, event):
rules = self._label_alert_rules(unit_rules, appname)
group = {"name": self.group_name(appname), "rules": rules}
groups.append(group)

event.relation.data[self._charm.app]["scrape_jobs"] = json.dumps(jobs)
event.relation.data[self._charm.app]["alert_rules"] = json.dumps({"groups": groups})
alert_rules = AlertRules(query_type="promql", topology=self.topology)
# Add alert rules from file
if self.path_to_own_alert_rules:
alert_rules.add_path(self.path_to_own_alert_rules, recursive=True)
# Add generic alert rules
alert_rules.add(
generic_alert_groups.application_rules, group_name_prefix=self.topology.identifier
)
groups.extend(alert_rules.as_dict()["groups"])

# Set scrape jobs and alert rules in relation data
relations = [event.relation] if event else self.model.relations[self._prometheus_relation]
for rel in relations:
rel.data[self._charm.app]["scrape_jobs"] = json.dumps(jobs) # type: ignore
rel.data[self._charm.app]["alert_rules"] = json.dumps( # type: ignore
{"groups": groups if self._forward_alert_rules else []}
)

def _on_prometheus_targets_changed(self, event):
"""Update scrape jobs in response to scrape target changes.
Expand Down Expand Up @@ -2129,7 +2164,9 @@ def set_alert_rule_data(self, name: str, unit_rules: dict, label_rules: bool = T

if updated_group["name"] not in [g["name"] for g in groups]:
groups.append(updated_group)
relation.data[self._charm.app]["alert_rules"] = json.dumps({"groups": groups})
relation.data[self._charm.app]["alert_rules"] = json.dumps(
{"groups": groups if self._forward_alert_rules else []}
)

if not _type_convert_stored(self._stored.alert_rules) == groups: # pyright: ignore
self._stored.alert_rules = groups
Expand Down Expand Up @@ -2177,8 +2214,8 @@ def remove_alert_rules(self, group_name: str, unit_name: str) -> None:
changed_group["rules"] = rules_kept # type: ignore
groups.append(changed_group)

relation.data[self._charm.app]["alert_rules"] = (
json.dumps({"groups": groups}) if groups else "{}"
relation.data[self._charm.app]["alert_rules"] = json.dumps(
{"groups": groups if self._forward_alert_rules else []}
)

if not _type_convert_stored(self._stored.alert_rules) == groups: # pyright: ignore
Expand Down
Loading
Loading