Skip to content

Commit

Permalink
Add watch rule_files subcommand
Browse files Browse the repository at this point in the history
  • Loading branch information
prymitive committed Mar 11, 2024
1 parent 0255f16 commit f798732
Show file tree
Hide file tree
Showing 12 changed files with 193 additions and 37 deletions.
1 change: 0 additions & 1 deletion cmd/pint/tests/0167_rule_duplicate_symlink.txt
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ level=INFO msg="Configured new Prometheus server" name=prom2 uris=1 uptime=up ta
-- stderrV2.txt --
-- src/v0.yml --
groups:
groups:
- name: g1
rules:
- alert: DownAlert
Expand Down
43 changes: 43 additions & 0 deletions cmd/pint/tests/0168_watch_rule_files.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
http response prometheus /api/v1/status/config 200 {"status":"success","data":{"yaml":"rule_files:\n - rules/*\n"}}
http response prometheus /api/v1/metadata 200 {"status":"success","data":{}}
http response prometheus /api/v1/status/flags 200 {"status":"success","data":{"storage.tsdb.retention.time": "1d"}}
http response prometheus /api/v1/query_range 200 {"status":"success","data":{"resultType":"matrix","result":[]}}
http response prometheus /api/v1/query 200 {"status":"success","data":{"resultType":"vector","result":[{"metric":{},"value":[1666873962.795,"1"]}]}}
http start prometheus 127.0.0.1:7168

exec bash -x ./test.sh &

pint.ok --no-color -l debug watch --interval=5s --listen=127.0.0.1:6168 --pidfile=pint.pid rule_files prom
! stdout .

stderr 'level=DEBUG msg="Glob finder completed" count=2'
stderr 'level=DEBUG msg="Glob finder completed" count=3'

-- test.sh --
sleep 7
mv more/*.yaml rules/
sleep 7
cat pint.pid | xargs kill

-- rules/1.yaml --
groups:
- name: g1
rules:
- alert: DownAlert1
expr: up == 0
-- rules/2.yaml --
groups:
- name: g2
rules:
- alert: DownAlert2
expr: up == 0
-- more/3.yaml --
groups:
- name: g2
rules:
- alert: DownAlert2
expr: up == 0
-- .pint.hcl --
prometheus "prom" {
uri = "http://localhost:7168"
}
93 changes: 72 additions & 21 deletions cmd/pint/watch.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,59 @@ var watchCmd = &cli.Command{
Usage: "Run in the foreground and continuesly check specified rules.",
Subcommands: []*cli.Command{
{
Name: "glob",
Usage: "Check a list of files or directories (can be a glob).",
Action: actionWatch,
Name: "glob",
Usage: "Check a list of files or directories (can be a glob).",
Action: func(c *cli.Context) error {
meta, err := actionSetup(c)
if err != nil {
return err
}

paths := c.Args().Slice()
if len(paths) == 0 {
return fmt.Errorf("at least one file or directory required")
}

slog.Debug("Starting glob watch", slog.Any("paths", paths))
return actionWatch(c, meta, func(_ context.Context) ([]string, error) {
return paths, nil
})
},
},
{
Name: "rule_files",
Usage: "Check the list of rule files from paths loaded by Prometheus.",
Action: func(c *cli.Context) error {
meta, err := actionSetup(c)
if err != nil {
return err
}

args := c.Args().Slice()
if len(args) != 1 {
return fmt.Errorf("exactly one argument required with the URI of Prometheus server to query")
}

gen := config.NewPrometheusGenerator(meta.cfg, prometheus.NewRegistry())
if err = gen.GenerateStatic(); err != nil {
return err
}

prom := gen.ServerWithName(args[0])
if prom == nil {
return fmt.Errorf("no Prometheus named %q configured in pint", args[0])
}

slog.Debug("Starting rule_fules watch", slog.String("name", args[0]))

return actionWatch(c, meta, func(ctx context.Context) ([]string, error) {
cfg, err := prom.Config(ctx, time.Millisecond)
if err != nil {
return nil, fmt.Errorf("failed to query %q Prometheus configuration: %w", prom.Name(), err)
}
return cfg.Config.RuleFiles, nil
})
},
},
},
Flags: []cli.Flag{
Expand Down Expand Up @@ -80,17 +130,7 @@ var watchCmd = &cli.Command{
},
}

func actionWatch(c *cli.Context) error {
meta, err := actionSetup(c)
if err != nil {
return err
}

paths := c.Args().Slice()
if len(paths) == 0 {
return fmt.Errorf("at least one file or directory required")
}

func actionWatch(c *cli.Context, meta actionMeta, f pathFinderFunc) error {
minSeverity, err := checks.ParseSeverity(c.String(minSeverityFlag))
if err != nil {
return fmt.Errorf("invalid --%s value: %w", minSeverityFlag, err)
Expand All @@ -114,7 +154,7 @@ func actionWatch(c *cli.Context) error {
}

// start HTTP server for metrics
collector := newProblemCollector(meta.cfg, paths, minSeverity, c.Int(maxProblemsFlag))
collector := newProblemCollector(meta.cfg, f, minSeverity, c.Int(maxProblemsFlag))
// register all metrics
metricsRegistry.MustRegister(collector)
metricsRegistry.MustRegister(checkDuration)
Expand Down Expand Up @@ -221,22 +261,22 @@ func startTimer(ctx context.Context, workers int, isOffline bool, gen *config.Pr
}

type problemCollector struct {

Check failure on line 263 in cmd/pint/watch.go

View workflow job for this annotation

GitHub Actions / format

16 bytes saved: struct with 168 pointer bytes could be 152
finder pathFinderFunc
cfg config.Config
fileOwners map[string]string
summary *reporter.Summary
problem *prometheus.Desc
problems *prometheus.Desc
fileOwnersMetric *prometheus.Desc
paths []string
minSeverity checks.Severity
maxProblems int
lock sync.Mutex
}

func newProblemCollector(cfg config.Config, paths []string, minSeverity checks.Severity, maxProblems int) *problemCollector {
func newProblemCollector(cfg config.Config, f pathFinderFunc, minSeverity checks.Severity, maxProblems int) *problemCollector {
return &problemCollector{
finder: f,
cfg: cfg,
paths: paths,
fileOwners: map[string]string{},
problem: prometheus.NewDesc(
"pint_problem",
Expand All @@ -262,8 +302,13 @@ func newProblemCollector(cfg config.Config, paths []string, minSeverity checks.S
}

func (c *problemCollector) scan(ctx context.Context, workers int, isOffline bool, gen *config.PrometheusGenerator) error {
slog.Info("Finding all rules to check", slog.Any("paths", c.paths))
entries, err := discovery.NewGlobFinder(c.paths, git.NewPathFilter(nil, nil, c.cfg.Parser.CompileRelaxed())).Find()
paths, err := c.finder(ctx)
if err != nil {
return fmt.Errorf("failed to get the list of paths to check: %w", err)
}

slog.Info("Finding all rules to check", slog.Any("paths", paths))
entries, err := discovery.NewGlobFinder(paths, git.NewPathFilter(nil, nil, c.cfg.Parser.CompileRelaxed())).Find()
if err != nil {
return err
}
Expand Down Expand Up @@ -310,7 +355,11 @@ func (c *problemCollector) Collect(ch chan<- prometheus.Metric) {

for _, report := range c.summary.Reports() {
if report.Problem.Severity < c.minSeverity {
slog.Debug("Skipping report", slog.String("severity", report.Problem.Severity.String()))
slog.Debug(
"Skipping report with severity lower than minimum configured",
slog.String("severity", report.Problem.Severity.String()),
slog.String("minimum", c.minSeverity.String()),
)
continue
}

Expand Down Expand Up @@ -363,3 +412,5 @@ func (c *problemCollector) Collect(ch chan<- prometheus.Metric) {
}
}
}

type pathFinderFunc func(ctx context.Context) ([]string, error)
5 changes: 5 additions & 0 deletions docs/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@
- [labels/conflict](checks/labels/conflict.md) check will now warn if alerting
rule is setting labels that are already configured as external labels.

### Changed

- `pint watch` command now has two sub-commands: `pint watch glob` and `pint watch rule_files`.
See [watch mode](index.md#watch-mode) docs for details.

### Fixed

- Fixed false positive reports from [rule/duplicate](checks/rule/duplicate.md) when
Expand Down
51 changes: 47 additions & 4 deletions docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -173,17 +173,18 @@ then pint will also run "online" checks for it to, for example, ensure all
time series used inside your alerting rules are still present.
Example config:

```json
```js
prometheus "local" {
uri = "http://localhost:9090"
uri = "http://localhost:9090"
}
```

#### Getting list of files to check from Prometheus

You can also point pint directly at a Prometheus server from the config file.
This will make pint query Prometheus API to get the current value of `rule_files`
Prometheus config option and then run checks on all matching files.
On every iteration, before starting any checks, pint will query Prometheus API
to get the current value of `rule_files` Prometheus config option and then run
checks on all matching files.
This way if you test your rules against a running Prometheus instance then you don't
need to manually specify any paths or directories.

Expand Down Expand Up @@ -272,6 +273,48 @@ Here's an example alert you can use for problems detected by pint:

{% endraw %}

## YAML parser

By default pint will expect all Prometheus rule files to be following the exact
syntax Prometheus expects for YAML files containing [recording](https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/)
and [alerting](https://prometheus.io/docs/prometheus/latest/configuration/alerting_rules/)
rules.
If you have Prometheus rules stored in YAML files with different YAML tree, but still
retain the same set of fields, for example:

```yaml
# Flat rule list
- alert: AlertName
expr: up == 0
- record: sum:up
expr: count(up == 1)
```

```yaml
# Rules nested under custom tree
service:
prometheus:
rules:
- alert: AlertName
expr: up == 0
- record: sum:up
expr: count(up == 1)
```

You can still check these rules using pint, but you need to switch pint YAML
parser into "relaxed" mode by adding this section to pint config file:

```js
parser {
relaxed = [ "my/files/*.yml" ]
}
```

See [parser](configuration.md#parser) documentation for more details.
"Relaxed" parser mode will load anything that can be parsed as Prometheus rule,
while "strict" parser mode will fail if it reads a file that wouldn't load
cleanly as Prometheus config file.

## Control comments

There is a number of comments you can add to your rule files in order to change
Expand Down
2 changes: 1 addition & 1 deletion internal/checks/alerts_external_labels.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ func (c AlertsExternalLabelsCheck) Check(ctx context.Context, _ string, rule par
return problems
}

cfg, err := c.prom.Config(ctx)
cfg, err := c.prom.Config(ctx, 0)
if err != nil {
text, severity := textAndSeverityFromError(err, c.Reporter(), c.prom.Name(), Bug)
problems = append(problems, Problem{
Expand Down
2 changes: 1 addition & 1 deletion internal/checks/labels_conflict.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ func (c LabelsConflictCheck) Check(ctx context.Context, _ string, rule parser.Ru
return problems
}

cfg, err := c.prom.Config(ctx)
cfg, err := c.prom.Config(ctx, 0)
if err != nil {
text, severity := textAndSeverityFromError(err, c.Reporter(), c.prom.Name(), Warning)
problems = append(problems, Problem{
Expand Down
2 changes: 1 addition & 1 deletion internal/checks/promql_rate.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ func (c RateCheck) Check(ctx context.Context, _ string, rule parser.Rule, entrie
return problems
}

cfg, err := c.prom.Config(ctx)
cfg, err := c.prom.Config(ctx, 0)
if err != nil {
text, severity := textAndSeverityFromError(err, c.Reporter(), c.prom.Name(), Bug)
problems = append(problems, Problem{
Expand Down
9 changes: 9 additions & 0 deletions internal/config/prometheus.go
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,15 @@ func (pg *PrometheusGenerator) ServersForPath(path string) []*promapi.FailoverGr
return servers
}

func (pg *PrometheusGenerator) ServerWithName(name string) *promapi.FailoverGroup {
for _, server := range pg.servers {
if server.Name() == name {
return server
}
}
return nil
}

func (pg *PrometheusGenerator) addServer(server *promapi.FailoverGroup) error {
for _, s := range pg.servers {
if s.Name() == server.Name() {
Expand Down
14 changes: 10 additions & 4 deletions internal/promapi/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ type ConfigSectionGlobal struct {
}

type PrometheusConfig struct {

Check failure on line 25 in internal/promapi/config.go

View workflow job for this annotation

GitHub Actions / format

8 bytes saved: struct with 40 pointer bytes could be 32
Global ConfigSectionGlobal `yaml:"global"`
Global ConfigSectionGlobal `yaml:"global"`
RuleFiles []string `yaml:"rule_files"`
}

type ConfigResult struct {
Expand All @@ -36,6 +37,7 @@ type configQuery struct {
prom *Prometheus
ctx context.Context
timestamp time.Time
cacheTTL time.Duration
}

func (q configQuery) Run() queryResult {
Expand Down Expand Up @@ -80,19 +82,23 @@ func (q configQuery) CacheKey() uint64 {
}

func (q configQuery) CacheTTL() time.Duration {
return time.Minute * 10
return q.cacheTTL
}

func (p *Prometheus) Config(ctx context.Context) (*ConfigResult, error) {
func (p *Prometheus) Config(ctx context.Context, cacheTTL time.Duration) (*ConfigResult, error) {
slog.Debug("Scheduling Prometheus configuration query", slog.String("uri", p.safeURI))

key := "/api/v1/status/config"
p.locker.lock(key)
defer p.locker.unlock(key)

if cacheTTL == 0 {
cacheTTL = time.Minute
}

resultChan := make(chan queryResult)
p.queries <- queryRequest{
query: configQuery{prom: p, ctx: ctx, timestamp: time.Now()},
query: configQuery{prom: p, ctx: ctx, timestamp: time.Now(), cacheTTL: cacheTTL},
result: resultChan,
}

Expand Down
4 changes: 2 additions & 2 deletions internal/promapi/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ func TestConfig(t *testing.T) {
prom.StartWorkers()
defer prom.Close()

cfg, err := prom.Config(context.Background())
cfg, err := prom.Config(context.Background(), time.Minute)
if tc.err != "" {
require.EqualError(t, err, tc.err, tc)
} else {
Expand Down Expand Up @@ -187,7 +187,7 @@ func TestConfigHeaders(t *testing.T) {
fg.StartWorkers(reg)
defer fg.Close(reg)

_, err := fg.Config(context.Background())
_, err := fg.Config(context.Background(), 0)
require.NoError(t, err)
})
}
Expand Down
Loading

0 comments on commit f798732

Please sign in to comment.