-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcheck_puppet_nodesync.py
131 lines (102 loc) · 4.7 KB
/
check_puppet_nodesync.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
#!/usr/bin/env python3
# coding=utf-8
import argparse
import datetime
import logging
import nagiosplugin
import pypuppetdb
import re
_log = logging.getLogger('nagiosplugin')
class ListContext(nagiosplugin.Context):
def __init__(self, name, text, state=nagiosplugin.state.Critical, result_cls=nagiosplugin.result.Result): # pylint: disable=too-many-arguments
self.text = text
self.state = state
fmt_metric = ''
super(ListContext, self).__init__(name, fmt_metric, result_cls)
def evaluate(self, metric, resource):
if len(metric.value) == 0:
return self.result_cls(nagiosplugin.state.Ok, "No hosts %s" % self.text, metric)
else:
d = ",".join(metric.value)
return self.result_cls(self.state, "Hosts: %s %s" % (d, self.text), metric)
class PuppetNodeSync(nagiosplugin.Resource):
def __init__(self, args):
self.args = args
self.now = datetime.datetime.now(datetime.timezone.utc)
def check_in_sync(self, time, max_diff=60):
dt = (self.now - time) / datetime.timedelta(minutes=1)
if dt > max_diff:
return False
else:
return True
def probe(self):
_log.debug("Connecting to PuppetDB on %s:%s", self.args.db, self.args.port)
pdb = pypuppetdb.connect(host=self.args.db, port=self.args.port, timeout=self.args.timeout)
nodes = pdb.nodes()
node_status = dict(total=[], ignored=[], in_sync=[], no_sync=[], no_report=[], unchanged=[], failed=[], changed=[])
r_exc = None
if self.args.exclude is not None:
r_exc = re.compile(self.args.exclude)
for node in nodes:
nodename = str(node)
_log.debug("Processing node %s", nodename)
node_status["total"].append(nodename)
if r_exc is not None and r_exc.match(nodename):
node_status["ignored"].append(nodename)
continue
_log.debug("Querying PuppetDB reports for node %s", nodename)
d = pdb._query(endpoint="reports", query='["and",["=","certname","%s"],["=","latest_report?",true]]' % str(node))
if len(d) != 1:
_log.debug("Found no report for hostname %s", nodename)
node_status["no_report"].append(nodename)
continue
else:
status = d[0]["status"]
_log.debug("Found report for hostname %s with status: %s", nodename, status)
if status == "unchanged":
node_status["unchanged"].append(nodename)
elif status == "failed":
node_status["failed"].append(nodename)
# print(str(node), node.report_timestamp, status, "FAILED")
elif status == "changed":
node_status["changed"].append(nodename)
else:
raise RuntimeError("Unknown status %s" % status)
if self.check_in_sync(node.report_timestamp, self.args.sync_time):
node_status["in_sync"].append(nodename)
_log.debug("Hostname %s is in sync", nodename)
else:
node_status["no_sync"].append(nodename)
_log.debug("Hostname %s is out of sync", nodename)
yield nagiosplugin.Metric('nodes_total', len(node_status["total"]))
yield nagiosplugin.Metric('nodes_ignored', len(node_status["ignored"]))
yield nagiosplugin.Metric('nodes_changed', len(node_status["changed"]))
yield nagiosplugin.Metric('nodes_unchanged', len(node_status["unchanged"]))
yield nagiosplugin.Metric('nodes_failed', len(node_status["failed"]))
yield nagiosplugin.Metric('nodes_in_sync', len(node_status["in_sync"]))
yield nagiosplugin.Metric('nodes_no_sync', len(node_status["no_sync"]))
yield nagiosplugin.Metric('failed', node_status["failed"])
yield nagiosplugin.Metric('no_sync', node_status["no_sync"])
@nagiosplugin.guarded
def main():
argp = argparse.ArgumentParser()
argp.add_argument("-d", "--db", help="PuppetDB Server name", default="localhost")
argp.add_argument("-p", "--port", help="PuppetDB Server port", type=int, default=8080)
argp.add_argument("-t", "--timeout", help="PuppetDB Server timeout", type=int, default=60)
argp.add_argument("-v", "--verbose", help="Verbosity on checks", action='count', default=0)
argp.add_argument("-e", "--exclude", help="Regular expression to exclude nodes", default=None)
argp.add_argument("-s", "--sync-time", help="Time in minutes for nodes to be considered in sync", type=int, default=60)
args = argp.parse_args()
check = nagiosplugin.Check(PuppetNodeSync(args))
check.add(nagiosplugin.ScalarContext('nodes_total'))
check.add(nagiosplugin.ScalarContext('nodes_ignored'))
check.add(nagiosplugin.ScalarContext('nodes_changed'))
check.add(nagiosplugin.ScalarContext('nodes_unchanged'))
check.add(nagiosplugin.ScalarContext('nodes_failed'))
check.add(nagiosplugin.ScalarContext('nodes_in_sync'))
check.add(nagiosplugin.ScalarContext('nodes_no_sync'))
check.add(ListContext('failed', "failed", state=nagiosplugin.state.Critical))
check.add(ListContext('no_sync', "out of sync", state=nagiosplugin.state.Warn))
check.main(args.verbose, timeout=args.timeout)
if __name__ == '__main__':
main()