Skip to content

Commit

Permalink
Merge pull request #1279 from cloudflare/series
Browse files Browse the repository at this point in the history
Improve logic of promql/series
  • Loading branch information
prymitive authored Feb 11, 2025
2 parents f02d8df + bd87614 commit 313e8a2
Show file tree
Hide file tree
Showing 9 changed files with 1,159 additions and 1,377 deletions.
21 changes: 1 addition & 20 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,7 @@ jobs:
- name: Check for local changes
run: git diff --exit-code

# Codecov reporting is unreliable.
# Re-run report 3 times to have a better
# chance of success.
- name: Report code coverage (1)
- name: Report code coverage
uses: codecov/codecov-action@13ce06bfc6bbe3ecf90edbbf1bc32fe5978ca1d3 # v5.3.1
with:
token: ${{ secrets.CODECOV_TOKEN }}
Expand All @@ -43,19 +40,3 @@ jobs:
handle_no_reports_found: true
continue-on-error: true

- name: Report code coverage (2)
uses: codecov/codecov-action@13ce06bfc6bbe3ecf90edbbf1bc32fe5978ca1d3 # v5.3.1
with:
token: ${{ secrets.CODECOV_TOKEN }}
files: ./.cover/coverage.out
fail_ci_if_error: true
handle_no_reports_found: true
continue-on-error: true

- name: Report code coverage (3)
uses: codecov/codecov-action@13ce06bfc6bbe3ecf90edbbf1bc32fe5978ca1d3 # v5.3.1
with:
token: ${{ secrets.CODECOV_TOKEN }}
files: ./.cover/coverage.out
fail_ci_if_error: true
handle_no_reports_found: true
7 changes: 7 additions & 0 deletions docs/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,13 @@
- Added `names` option to the `parser` block, which controls how does Prometheus validates
label names.

### Fixed

- Improved the logic of [promql/series](checks/promql/series.md) check to skip checks on some selectors
which absence might be expected. For example using `foo unless bar` will now only have `foo`
tested while `bar` is ignored by this check. This is because the query implies that `bar`
might be present or missing and depending on that `foo` is evaluated.

## v0.70.0

### Added
Expand Down
15 changes: 15 additions & 0 deletions docs/checks/promql/series.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ to determine why, by checking if:
- `my_metric` has any series with `foo` label
- `my_metric` has any series matching `foo="bar"`

## Ignored cases

### vector() fallback

Metrics that are wrapped in `... or vector(0)` won't be checked, since
the intention of adding `or vector(0)` is to provide a fallback value
when there are no matching time series.
Expand All @@ -31,6 +35,17 @@ Example:
expr: sum(my_metric or vector(0)) > 1
```
### foo unless bar
In a query using `foo unless bar` only `foo` selector will be tested, since
this query implies that the presence of `bar` is not guaranteed.
`bar` is only tested for being present and so it's likely that it might be missing.

Note that using `foo unless bar > 5` doesn't follow the same pattern and will have
both `foo` and `bar` tested by this check
Having `bar > 5` means it's assumed to be always present as we're testing it's value,
rather than it's existence.

## Common problems

If you see this check complaining about some metric it's might due to a number
Expand Down
2 changes: 1 addition & 1 deletion internal/checks/alerts_template.go
Original file line number Diff line number Diff line change
Expand Up @@ -443,7 +443,7 @@ func checkQueryLabels(query, labelName, labelValue string, src []utils.Source) (
if s.IsDead {
continue
}
if s.FixedLabels && !slices.Contains(s.IncludedLabels, v[1]) {
if s.FixedLabels && !slices.Contains(s.IncludedLabels, v[1]) && !slices.Contains(s.GuaranteedLabels, v[1]) {
problems = append(problems, textForProblem(query, v[1], "", s, Bug))
goto NEXT
}
Expand Down
6 changes: 3 additions & 3 deletions internal/checks/promql_rate.go
Original file line number Diff line number Diff line change
Expand Up @@ -157,8 +157,8 @@ func (c RateCheck) checkNode(ctx context.Context, node *parser.PromQLNode, entri
if src.Type != utils.AggregateSource {
continue
}
for _, vs := range src.Selectors {
metadata, err := c.prom.Metadata(ctx, vs.Name)
if src.Selector != nil {
metadata, err := c.prom.Metadata(ctx, src.Selector.Name)
if err != nil {
if errors.Is(err, promapi.ErrUnsupported) {
continue
Expand Down Expand Up @@ -186,7 +186,7 @@ func (c RateCheck) checkNode(ctx context.Context, node *parser.PromQLNode, entri
}
problems = append(problems, exprProblem{
text: fmt.Sprintf("`rate(%s(counter))` chain detected, `%s` is called here on results of `%s(%s)`.",
src.Operation, node.Expr, src.Operation, vs),
src.Operation, node.Expr, src.Operation, src.Selector),
details: fmt.Sprintf(
"You can only calculate `rate()` directly from a counter metric. "+
"Calling `rate()` on `%s()` results will return bogus results because `%s()` will hide information on when each counter resets. "+
Expand Down
91 changes: 53 additions & 38 deletions internal/checks/promql_series.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ func (c SeriesCheck) Check(ctx context.Context, _ discovery.Path, rule parser.Ru
selectors := utils.HasVectorSelector(expr.Query)

done := map[string]bool{}
for _, selector := range getNonFallbackSelectors(expr.Query) {
for _, selector := range getNonFallbackSelectors(expr) {
if _, ok := done[selector.String()]; ok {
continue
}
Expand Down Expand Up @@ -186,7 +186,7 @@ func (c SeriesCheck) Check(ctx context.Context, _ discovery.Path, rule parser.Ru
if arEntry != nil {
slog.Debug(
"Metric is provided by alerting rule",
slog.String("selector", (&selector).String()),
slog.String("selector", selector.String()),
slog.String("path", arEntry.Path.Name),
)
} else {
Expand Down Expand Up @@ -218,14 +218,14 @@ func (c SeriesCheck) Check(ctx context.Context, _ discovery.Path, rule parser.Ru
}

// 1. If foo{bar, baz} is there -> GOOD
slog.Debug("Checking if selector returns anything", slog.String("check", c.Reporter()), slog.String("selector", (&selector).String()))
slog.Debug("Checking if selector returns anything", slog.String("check", c.Reporter()), slog.String("selector", selector.String()))
count, err := c.instantSeriesCount(ctx, fmt.Sprintf("count(%s)", selector.String()))
if err != nil {
problems = append(problems, c.queryProblem(err, expr))
continue
}
if count > 0 {
slog.Debug("Found series, skipping further checks", slog.String("check", c.Reporter()), slog.String("selector", (&selector).String()))
slog.Debug("Found series, skipping further checks", slog.String("check", c.Reporter()), slog.String("selector", selector.String()))
continue
}

Expand Down Expand Up @@ -376,7 +376,7 @@ func (c SeriesCheck) Check(ctx context.Context, _ discovery.Path, rule parser.Ru
slog.Debug(
"Series disappeared from prometheus but for less then configured min-age",
slog.String("check", c.Reporter()),
slog.String("selector", (&selector).String()),
slog.String("selector", selector.String()),
slog.String("min-age", output.HumanizeDuration(minAge)),
slog.String("last-seen", sinceDesc(newest(trs.Series.Ranges))),
)
Expand Down Expand Up @@ -444,7 +444,7 @@ func (c SeriesCheck) Check(ctx context.Context, _ discovery.Path, rule parser.Ru
Severity: severity,
})
slog.Debug("No historical series matching filter used in the query",
slog.String("check", c.Reporter()), slog.String("selector", (&selector).String()), slog.String("matcher", lm.String()))
slog.String("check", c.Reporter()), slog.String("selector", selector.String()), slog.String("matcher", lm.String()))
continue
}

Expand Down Expand Up @@ -482,7 +482,7 @@ func (c SeriesCheck) Check(ctx context.Context, _ discovery.Path, rule parser.Ru
slog.Debug(
"Series disappeared from prometheus but for less then configured min-age",
slog.String("check", c.Reporter()),
slog.String("selector", (&selector).String()),
slog.String("selector", selector.String()),
slog.String("min-age", output.HumanizeDuration(minAge)),
slog.String("last-seen", sinceDesc(newest(trsLabel.Series.Ranges))),
)
Expand All @@ -507,7 +507,7 @@ func (c SeriesCheck) Check(ctx context.Context, _ discovery.Path, rule parser.Ru
slog.Debug(
"Series matching filter disappeared from prometheus",
slog.String("check", c.Reporter()),
slog.String("selector", (&selector).String()),
slog.String("selector", selector.String()),
slog.String("matcher", lm.String()),
)
continue
Expand All @@ -528,7 +528,7 @@ func (c SeriesCheck) Check(ctx context.Context, _ discovery.Path, rule parser.Ru
slog.Debug(
"Series matching filter are only sometimes present",
slog.String("check", c.Reporter()),
slog.String("selector", (&selector).String()),
slog.String("selector", selector.String()),
slog.String("matcher", lm.String()),
)
}
Expand Down Expand Up @@ -686,7 +686,7 @@ func (c SeriesCheck) instantSeriesCount(ctx context.Context, query string) (int,
return series, nil
}

func (c SeriesCheck) getMinAge(rule parser.Rule, selector promParser.VectorSelector) (minAge time.Duration, problems []Problem) {
func (c SeriesCheck) getMinAge(rule parser.Rule, selector *promParser.VectorSelector) (minAge time.Duration, problems []Problem) {
minAge = time.Hour * 2
for _, ruleSet := range comments.Only[comments.RuleSet](rule.Comments, comments.RuleSetType) {
matcher, key, value := parseRuleSet(ruleSet.Value)
Expand Down Expand Up @@ -716,7 +716,7 @@ func (c SeriesCheck) getMinAge(rule parser.Rule, selector promParser.VectorSelec
return minAge, problems
}

func (c SeriesCheck) isLabelValueIgnored(settings *PromqlSeriesSettings, rule parser.Rule, selector promParser.VectorSelector, labelName string) bool {
func (c SeriesCheck) isLabelValueIgnored(settings *PromqlSeriesSettings, rule parser.Rule, selector *promParser.VectorSelector, labelName string) bool {
for matcher, names := range settings.IgnoreLabelsValue {
isMatch, _ := matchSelectorToMetric(selector, matcher)
if !isMatch {
Expand All @@ -739,7 +739,7 @@ func (c SeriesCheck) isLabelValueIgnored(settings *PromqlSeriesSettings, rule pa
}
}
if labelName == value {
slog.Debug("Label check disabled by comment", slog.String("selector", (&selector).String()), slog.String("label", labelName))
slog.Debug("Label check disabled by comment", slog.String("selector", selector.String()), slog.String("label", labelName))
return true
}
}
Expand All @@ -760,36 +760,51 @@ func (c SeriesCheck) textAndSeverity(settings *PromqlSeriesSettings, name, text
return text, s
}

func getNonFallbackSelectors(n *parser.PromQLNode) (selectors []promParser.VectorSelector) {
LOOP:
for _, vs := range parser.WalkDownExpr[*promParser.VectorSelector](n) {
for _, bin := range parser.WalkUpExpr[*promParser.BinaryExpr](vs.Parent) {
if binExp := bin.Expr.(*promParser.BinaryExpr); binExp.Op != promParser.LOR {
continue
func selectorWithoutOffset(vs *promParser.VectorSelector) *promParser.VectorSelector {
if vs.OriginalOffset == 0 {
return vs
}

s := &promParser.VectorSelector{}
*s = *vs
s.Offset = 0
s.OriginalOffset = 0
return s
}

func getNonFallbackSelectors(n parser.PromQLExpr) (selectors []*promParser.VectorSelector) {
var hasVectorFallback bool
sources := utils.LabelsSource(n.Value.Value, n.Query.Expr)
for _, ls := range sources {
if ls.AlwaysReturns && len(ls.ReturnedNumbers) > 0 {
hasVectorFallback = true
break
}
}
for _, ls := range sources {
if !hasVectorFallback {
if ls.Selector != nil {
selectors = append(selectors, selectorWithoutOffset(ls.Selector))
}
for _, vec := range parser.WalkDownExpr[*promParser.Call](bin) {
if vecCall := vec.Expr.(*promParser.Call); vecCall.Func.Name == "vector" {
// vector seletor is under (...) OR vector()
// ignore it
slog.Debug(
"Metric uses vector() fallback value, ignore",
slog.String("selector", (vs.Expr.String())),
)
continue LOOP
}
}
for _, js := range ls.Joins {
if js.Selector != nil {
selectors = append(selectors, selectorWithoutOffset(js.Selector))
}
}
// copy node without offset
nc := promParser.VectorSelector{
Name: vs.Expr.(*promParser.VectorSelector).Name,
LabelMatchers: vs.Expr.(*promParser.VectorSelector).LabelMatchers,
for _, us := range ls.Unless {
if !us.IsConditional {
continue
}
if us.Selector != nil {
selectors = append(selectors, selectorWithoutOffset(us.Selector))
}
}
selectors = append(selectors, nc)
}
return selectors
}

func stripLabels(selector promParser.VectorSelector) promParser.VectorSelector {
func stripLabels(selector *promParser.VectorSelector) promParser.VectorSelector {
s := promParser.VectorSelector{
Name: selector.Name,
LabelMatchers: []*labels.Matcher{},
Expand All @@ -805,7 +820,7 @@ func stripLabels(selector promParser.VectorSelector) promParser.VectorSelector {
return s
}

func isDisabled(rule parser.Rule, selector promParser.VectorSelector) bool {
func isDisabled(rule parser.Rule, selector *promParser.VectorSelector) bool {
for _, disable := range comments.Only[comments.Disable](rule.Comments, comments.DisableType) {
if strings.HasPrefix(disable.Match, SeriesCheckName+"(") && strings.HasSuffix(disable.Match, ")") {
cs := strings.TrimSuffix(strings.TrimPrefix(disable.Match, SeriesCheckName+"("), ")")
Expand All @@ -823,7 +838,7 @@ func isDisabled(rule parser.Rule, selector promParser.VectorSelector) bool {
return false
}

func matchSelectorToMetric(selector promParser.VectorSelector, metric string) (bool, bool) {
func matchSelectorToMetric(selector *promParser.VectorSelector, metric string) (bool, bool) {
// Try full string or name match first.
if metric == selector.String() || metric == selector.Name {
return true, true
Expand Down Expand Up @@ -889,7 +904,7 @@ func orphanedDisableComments(ctx context.Context, rule parser.Rule, selectors []
continue
}
for _, selector := range selectors {
isMatch, ok := matchSelectorToMetric(selector, match)
isMatch, ok := matchSelectorToMetric(&selector, match)
if !ok {
continue
}
Expand All @@ -912,7 +927,7 @@ func orphanedRuleSetComments(rule parser.Rule, selectors []promParser.VectorSele
matcher, key, value := parseRuleSet(ruleSet.Value)
for _, selector := range selectors {
if matcher != "" {
isMatch, _ := matchSelectorToMetric(selector, matcher)
isMatch, _ := matchSelectorToMetric(&selector, matcher)
if !isMatch {
continue
}
Expand Down
39 changes: 39 additions & 0 deletions internal/checks/promql_series_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4234,6 +4234,45 @@ func TestSeriesCheck(t *testing.T) {
},
},
},
{
description: "foo unless bar",
content: "- record: foo\n expr: foo unless bar\n",
checker: newSeriesCheck,
prometheus: newSimpleProm,
problems: noProblems,
mocks: []*prometheusMock{
{
conds: []requestCondition{
requireQueryPath,
formCond{key: "query", value: "count(foo)"},
},
resp: respondWithSingleInstantVector(),
},
},
},
{
description: "foo unless bar > 5",
content: "- record: foo\n expr: foo unless bar > 5\n",
checker: newSeriesCheck,
prometheus: newSimpleProm,
problems: noProblems,
mocks: []*prometheusMock{
{
conds: []requestCondition{
requireQueryPath,
formCond{key: "query", value: "count(foo)"},
},
resp: respondWithSingleInstantVector(),
},
{
conds: []requestCondition{
requireQueryPath,
formCond{key: "query", value: "count(bar)"},
},
resp: respondWithSingleInstantVector(),
},
},
},
}
runTests(t, testCases)
}
Loading

0 comments on commit 313e8a2

Please sign in to comment.