Skip to content

Commit

Permalink
Add more checks to labels/conflict
Browse files Browse the repository at this point in the history
  • Loading branch information
prymitive committed Mar 11, 2024
1 parent 239bac2 commit 9dbc0c2
Show file tree
Hide file tree
Showing 4 changed files with 149 additions and 16 deletions.
5 changes: 5 additions & 0 deletions docs/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@

## v0.55.0

### Added

- [labels/conflict](checks/labels/conflict.md) check will now warn if alerting
rule is setting labels that are already configured as external labels.

### Fixed

- Fixed false positive reports from [rule/duplicate](checks/rule/duplicate.md) when
Expand Down
48 changes: 45 additions & 3 deletions docs/checks/labels/conflict.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,52 @@ Below is the list of conflicts it looks for.

## External labels

If recording rules are manually setting some labels that are
### Recording rules

If any recording rules are manually setting some labels that are
already present in `external_labels` Prometheus configuration option
then both labels might conflict when metrics are federated or when sending
alerts.
then both labels might conflict when metrics are ingested to another
Prometheus via federation or remote read/write.

Example:

Consider this recording rule:

```yaml
groups:
name: recording rules
rules:
- record: prometheus_http_requests_total:rate2m
expr: rate(prometheus_http_requests_total[2m])
labels:
cluster: dev
```
If this rule is deployed to Prometheus server with this configuration:
```yaml
global:
external_labels:
site: site01
cluster: staging
```
Then making a `/federate` request will return time series with `cluster="staging"` label,
except for `prometheus_http_requests_total:rate2m` time series which will have `cluster="dev"`
label from the recording rule, which might cause unexpected inconsistencies.

If both the recording rule and `external_labels` config section uses same label value for the
`cluster` label then this effectively makes `cluster` label redundant, so in both cases it's
best to avoid setting labels used in `external_labels` on individual rules.

### Alerting rules

Same problem exists for alerting rules, with the only difference being how the label is
being used.
Any label listed in `external_labels` will be added to all firing alerts.
Setting `cluster` label on alerting rule will override `external_labels` and
can cause confusion when alert sent from `cluster="staging"` Prometheus has `cluster="dev"`
label set.

## Configuration

Expand Down
26 changes: 18 additions & 8 deletions internal/checks/labels_conflict.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,27 +42,30 @@ func (c LabelsConflictCheck) Reporter() string {
}

func (c LabelsConflictCheck) Check(ctx context.Context, _ string, rule parser.Rule, _ []discovery.Entry) (problems []Problem) {
if rule.RecordingRule == nil || rule.RecordingRule.Expr.SyntaxError != nil {
return nil
var labels *parser.YamlMap
if rule.AlertingRule != nil && rule.AlertingRule.Expr.SyntaxError == nil && rule.AlertingRule.Labels != nil {
labels = rule.AlertingRule.Labels
}

if rule.RecordingRule.Labels == nil {
return nil
if rule.RecordingRule != nil && rule.RecordingRule.Expr.SyntaxError == nil && rule.RecordingRule.Labels != nil {
labels = rule.RecordingRule.Labels
}
if labels == nil {
return problems
}

cfg, err := c.prom.Config(ctx)
if err != nil {
text, severity := textAndSeverityFromError(err, c.Reporter(), c.prom.Name(), Warning)
problems = append(problems, Problem{
Lines: rule.RecordingRule.Labels.Lines,
Lines: labels.Lines,
Reporter: c.Reporter(),
Text: text,
Severity: severity,
})
return problems
}

for _, label := range rule.RecordingRule.Labels.Items {
for _, label := range labels.Items {
for k, v := range cfg.Config.Global.ExternalLabels {
if label.Key.Value == k {
problems = append(problems, Problem{
Expand All @@ -71,7 +74,7 @@ func (c LabelsConflictCheck) Check(ctx context.Context, _ string, rule parser.Ru
Last: label.Value.Lines.Last,
},
Reporter: c.Reporter(),
Text: fmt.Sprintf("%s external_labels already has %s=%q label set, please choose a different name for this label to avoid any conflicts.", promText(c.prom.Name(), cfg.URI), k, v),
Text: c.formatText(k, label.Value.Value, v, rule.Type(), cfg),
Details: fmt.Sprintf("[Click here](%s/config) to see `%s` Prometheus runtime configuration.", cfg.PublicURI, c.prom.Name()),
Severity: Warning,
})
Expand All @@ -81,3 +84,10 @@ func (c LabelsConflictCheck) Check(ctx context.Context, _ string, rule parser.Ru

return problems
}

func (c LabelsConflictCheck) formatText(k, lv, ev string, kind parser.RuleType, cfg *promapi.ConfigResult) string {
if (lv == ev) && kind == parser.AlertingRuleType {
return fmt.Sprintf("This label is redundant. %s external_labels already has %s=%q label set and it will be automatically added to all alerts, there's no need to set it manually.", promText(c.prom.Name(), cfg.URI), k, ev)
}
return fmt.Sprintf("%s external_labels already has %s=%q label set, please choose a different name for this label to avoid any conflicts.", promText(c.prom.Name(), cfg.URI), k, ev)
}
86 changes: 81 additions & 5 deletions internal/checks/labels_conflict_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,14 @@ import (
"github.com/cloudflare/pint/internal/promapi"
)

func textExternalLabels(name, uri, k, v string) string {
func textExternalLabelsRR(name, uri, k, v string) string {
return fmt.Sprintf("`%s` Prometheus server at %s external_labels already has %s=%q label set, please choose a different name for this label to avoid any conflicts.", name, uri, k, v)
}

func textExternalLabelsAR(name, uri, k, v string) string {
return fmt.Sprintf("This label is redundant. `%s` Prometheus server at %s external_labels already has %s=%q label set and it will be automatically added to all alerts, there's no need to set it manually.", name, uri, k, v)
}

func newLabelsConflict(prom *promapi.FailoverGroup) checks.RuleChecker {
return checks.NewLabelsConflictCheck(prom)
}
Expand All @@ -35,12 +39,19 @@ func TestLabelsConflictCheck(t *testing.T) {
problems: noProblems,
},
{
description: "no labels",
description: "no labels / recording",
content: "- record: foo\n expr: up == 0\n",
checker: newLabelsConflict,
prometheus: newSimpleProm,
problems: noProblems,
},
{
description: "no labels / alerting",
content: "- alert: foo\n expr: up == 0\n",
checker: newLabelsConflict,
prometheus: newSimpleProm,
problems: noProblems,
},
{
description: "connection refused",
content: "- record: foo\n expr: up == 0\n labels:\n foo: bar\n",
Expand All @@ -63,7 +74,7 @@ func TestLabelsConflictCheck(t *testing.T) {
},
},
{
description: "conflict",
description: "conflict / recording",
content: "- record: foo\n expr: up == 0\n labels:\n foo: bar\n",
checker: newLabelsConflict,
prometheus: newSimpleProm,
Expand All @@ -75,7 +86,33 @@ func TestLabelsConflictCheck(t *testing.T) {
Last: 4,
},
Reporter: checks.LabelsConflictCheckName,
Text: textExternalLabels("prom", uri, "foo", "bob"),
Text: textExternalLabelsRR("prom", uri, "foo", "bob"),
Details: alertsExternalLabelsDetails("prom", uri),
Severity: checks.Warning,
},
}
},
mocks: []*prometheusMock{
{
conds: []requestCondition{requireConfigPath},
resp: configResponse{yaml: "global:\n external_labels:\n foo: bob\n"},
},
},
},
{
description: "conflict / alerting / different",
content: "- alert: foo\n expr: up == 0\n labels:\n foo: bar\n",
checker: newLabelsConflict,
prometheus: newSimpleProm,
problems: func(uri string) []checks.Problem {
return []checks.Problem{
{
Lines: parser.LineRange{
First: 4,
Last: 4,
},
Reporter: checks.LabelsConflictCheckName,
Text: textExternalLabelsRR("prom", uri, "foo", "bob"),
Details: alertsExternalLabelsDetails("prom", uri),
Severity: checks.Warning,
},
Expand All @@ -89,7 +126,33 @@ func TestLabelsConflictCheck(t *testing.T) {
},
},
{
description: "no conflict",
description: "conflict / alerting / identical",
content: "- alert: foo\n expr: up == 0\n labels:\n foo: bar\n",
checker: newLabelsConflict,
prometheus: newSimpleProm,
problems: func(uri string) []checks.Problem {
return []checks.Problem{
{
Lines: parser.LineRange{
First: 4,
Last: 4,
},
Reporter: checks.LabelsConflictCheckName,
Text: textExternalLabelsAR("prom", uri, "foo", "bar"),
Details: alertsExternalLabelsDetails("prom", uri),
Severity: checks.Warning,
},
}
},
mocks: []*prometheusMock{
{
conds: []requestCondition{requireConfigPath},
resp: configResponse{yaml: "global:\n external_labels:\n foo: bar\n"},
},
},
},
{
description: "no conflict / recording",
content: "- record: foo\n expr: up == 0\n labels:\n foo: bar\n",
checker: newLabelsConflict,
prometheus: newSimpleProm,
Expand All @@ -101,6 +164,19 @@ func TestLabelsConflictCheck(t *testing.T) {
},
},
},
{
description: "no conflict / alerting",
content: "- alert: foo\n expr: up == 0\n labels:\n foo: bar\n",
checker: newLabelsConflict,
prometheus: newSimpleProm,
problems: noProblems,
mocks: []*prometheusMock{
{
conds: []requestCondition{requireConfigPath},
resp: configResponse{yaml: "global:\n external_labels:\n bob: bob\n"},
},
},
},
}

runTests(t, testCases)
Expand Down

0 comments on commit 9dbc0c2

Please sign in to comment.