diff --git a/docs/changelog.md b/docs/changelog.md index f7c58e35..1c478ba7 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -2,6 +2,11 @@ ## v0.55.0 +### Added + +- [labels/conflict](checks/labels/conflict.md) check will now warn if alerting + rule is setting labels that are already configured as external labels. + ### Fixed - Fixed false positive reports from [rule/duplicate](checks/rule/duplicate.md) when diff --git a/docs/checks/labels/conflict.md b/docs/checks/labels/conflict.md index a5517e14..da4594d7 100644 --- a/docs/checks/labels/conflict.md +++ b/docs/checks/labels/conflict.md @@ -11,10 +11,52 @@ Below is the list of conflicts it looks for. ## External labels -If recording rules are manually setting some labels that are +### Recording rules + +If any recording rules are manually setting some labels that are already present in `external_labels` Prometheus configuration option -then both labels might conflict when metrics are federated or when sending -alerts. +then both labels might conflict when metrics are ingested to another +Prometheus via federation or remote read/write. + +Example: + +Consider this recording rule: + +```yaml +groups: + name: recording rules + rules: + - record: prometheus_http_requests_total:rate2m + expr: rate(prometheus_http_requests_total[2m]) + labels: + cluster: dev +``` + +If this rule is deployed to Prometheus server with this configuration: + +```yaml +global: + external_labels: + site: site01 + cluster: staging +``` + +Then making a `/federate` request will return time series with `cluster="staging"` label, +except for `prometheus_http_requests_total:rate2m` time series which will have `cluster="dev"` +label from the recording rule, which might cause unexpected inconsistencies. + +If both the recording rule and `external_labels` config section uses same label value for the +`cluster` label then this effectively makes `cluster` label redundant, so in both cases it's +best to avoid setting labels used in `external_labels` on individual rules. + +### Alerting rules + +Same problem exists for alerting rules, with the only difference being how the label is +being used. +Any label listed in `external_labels` will be added to all firing alerts. +Setting `cluster` label on alerting rule will override `external_labels` and +can cause confusion when alert sent from `cluster="staging"` Prometheus has `cluster="dev"` +label set. ## Configuration diff --git a/internal/checks/labels_conflict.go b/internal/checks/labels_conflict.go index 8c273f3f..35c2c44a 100644 --- a/internal/checks/labels_conflict.go +++ b/internal/checks/labels_conflict.go @@ -42,19 +42,22 @@ func (c LabelsConflictCheck) Reporter() string { } func (c LabelsConflictCheck) Check(ctx context.Context, _ string, rule parser.Rule, _ []discovery.Entry) (problems []Problem) { - if rule.RecordingRule == nil || rule.RecordingRule.Expr.SyntaxError != nil { - return nil + var labels *parser.YamlMap + if rule.AlertingRule != nil && rule.AlertingRule.Expr.SyntaxError == nil && rule.AlertingRule.Labels != nil { + labels = rule.AlertingRule.Labels } - - if rule.RecordingRule.Labels == nil { - return nil + if rule.RecordingRule != nil && rule.RecordingRule.Expr.SyntaxError == nil && rule.RecordingRule.Labels != nil { + labels = rule.RecordingRule.Labels + } + if labels == nil { + return problems } cfg, err := c.prom.Config(ctx) if err != nil { text, severity := textAndSeverityFromError(err, c.Reporter(), c.prom.Name(), Warning) problems = append(problems, Problem{ - Lines: rule.RecordingRule.Labels.Lines, + Lines: labels.Lines, Reporter: c.Reporter(), Text: text, Severity: severity, @@ -62,7 +65,7 @@ func (c LabelsConflictCheck) Check(ctx context.Context, _ string, rule parser.Ru return problems } - for _, label := range rule.RecordingRule.Labels.Items { + for _, label := range labels.Items { for k, v := range cfg.Config.Global.ExternalLabels { if label.Key.Value == k { problems = append(problems, Problem{ @@ -71,7 +74,7 @@ func (c LabelsConflictCheck) Check(ctx context.Context, _ string, rule parser.Ru Last: label.Value.Lines.Last, }, Reporter: c.Reporter(), - Text: fmt.Sprintf("%s external_labels already has %s=%q label set, please choose a different name for this label to avoid any conflicts.", promText(c.prom.Name(), cfg.URI), k, v), + Text: c.formatText(k, label.Value.Value, v, rule.Type(), cfg), Details: fmt.Sprintf("[Click here](%s/config) to see `%s` Prometheus runtime configuration.", cfg.PublicURI, c.prom.Name()), Severity: Warning, }) @@ -81,3 +84,10 @@ func (c LabelsConflictCheck) Check(ctx context.Context, _ string, rule parser.Ru return problems } + +func (c LabelsConflictCheck) formatText(k, lv, ev string, kind parser.RuleType, cfg *promapi.ConfigResult) string { + if (lv == ev) && kind == parser.AlertingRuleType { + return fmt.Sprintf("This label is redundant. %s external_labels already has %s=%q label set and it will be automatically added to all alerts, there's no need to set it manually.", promText(c.prom.Name(), cfg.URI), k, ev) + } + return fmt.Sprintf("%s external_labels already has %s=%q label set, please choose a different name for this label to avoid any conflicts.", promText(c.prom.Name(), cfg.URI), k, ev) +} diff --git a/internal/checks/labels_conflict_test.go b/internal/checks/labels_conflict_test.go index a50dd9c2..d11470cd 100644 --- a/internal/checks/labels_conflict_test.go +++ b/internal/checks/labels_conflict_test.go @@ -10,10 +10,14 @@ import ( "github.com/cloudflare/pint/internal/promapi" ) -func textExternalLabels(name, uri, k, v string) string { +func textExternalLabelsRR(name, uri, k, v string) string { return fmt.Sprintf("`%s` Prometheus server at %s external_labels already has %s=%q label set, please choose a different name for this label to avoid any conflicts.", name, uri, k, v) } +func textExternalLabelsAR(name, uri, k, v string) string { + return fmt.Sprintf("This label is redundant. `%s` Prometheus server at %s external_labels already has %s=%q label set and it will be automatically added to all alerts, there's no need to set it manually.", name, uri, k, v) +} + func newLabelsConflict(prom *promapi.FailoverGroup) checks.RuleChecker { return checks.NewLabelsConflictCheck(prom) } @@ -35,12 +39,19 @@ func TestLabelsConflictCheck(t *testing.T) { problems: noProblems, }, { - description: "no labels", + description: "no labels / recording", content: "- record: foo\n expr: up == 0\n", checker: newLabelsConflict, prometheus: newSimpleProm, problems: noProblems, }, + { + description: "no labels / alerting", + content: "- alert: foo\n expr: up == 0\n", + checker: newLabelsConflict, + prometheus: newSimpleProm, + problems: noProblems, + }, { description: "connection refused", content: "- record: foo\n expr: up == 0\n labels:\n foo: bar\n", @@ -63,7 +74,7 @@ func TestLabelsConflictCheck(t *testing.T) { }, }, { - description: "conflict", + description: "conflict / recording", content: "- record: foo\n expr: up == 0\n labels:\n foo: bar\n", checker: newLabelsConflict, prometheus: newSimpleProm, @@ -75,7 +86,33 @@ func TestLabelsConflictCheck(t *testing.T) { Last: 4, }, Reporter: checks.LabelsConflictCheckName, - Text: textExternalLabels("prom", uri, "foo", "bob"), + Text: textExternalLabelsRR("prom", uri, "foo", "bob"), + Details: alertsExternalLabelsDetails("prom", uri), + Severity: checks.Warning, + }, + } + }, + mocks: []*prometheusMock{ + { + conds: []requestCondition{requireConfigPath}, + resp: configResponse{yaml: "global:\n external_labels:\n foo: bob\n"}, + }, + }, + }, + { + description: "conflict / alerting / different", + content: "- alert: foo\n expr: up == 0\n labels:\n foo: bar\n", + checker: newLabelsConflict, + prometheus: newSimpleProm, + problems: func(uri string) []checks.Problem { + return []checks.Problem{ + { + Lines: parser.LineRange{ + First: 4, + Last: 4, + }, + Reporter: checks.LabelsConflictCheckName, + Text: textExternalLabelsRR("prom", uri, "foo", "bob"), Details: alertsExternalLabelsDetails("prom", uri), Severity: checks.Warning, }, @@ -89,7 +126,33 @@ func TestLabelsConflictCheck(t *testing.T) { }, }, { - description: "no conflict", + description: "conflict / alerting / identical", + content: "- alert: foo\n expr: up == 0\n labels:\n foo: bar\n", + checker: newLabelsConflict, + prometheus: newSimpleProm, + problems: func(uri string) []checks.Problem { + return []checks.Problem{ + { + Lines: parser.LineRange{ + First: 4, + Last: 4, + }, + Reporter: checks.LabelsConflictCheckName, + Text: textExternalLabelsAR("prom", uri, "foo", "bar"), + Details: alertsExternalLabelsDetails("prom", uri), + Severity: checks.Warning, + }, + } + }, + mocks: []*prometheusMock{ + { + conds: []requestCondition{requireConfigPath}, + resp: configResponse{yaml: "global:\n external_labels:\n foo: bar\n"}, + }, + }, + }, + { + description: "no conflict / recording", content: "- record: foo\n expr: up == 0\n labels:\n foo: bar\n", checker: newLabelsConflict, prometheus: newSimpleProm, @@ -101,6 +164,19 @@ func TestLabelsConflictCheck(t *testing.T) { }, }, }, + { + description: "no conflict / alerting", + content: "- alert: foo\n expr: up == 0\n labels:\n foo: bar\n", + checker: newLabelsConflict, + prometheus: newSimpleProm, + problems: noProblems, + mocks: []*prometheusMock{ + { + conds: []requestCondition{requireConfigPath}, + resp: configResponse{yaml: "global:\n external_labels:\n bob: bob\n"}, + }, + }, + }, } runTests(t, testCases)