-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathmetrics-influxdb-arc
executable file
·96 lines (76 loc) · 2.24 KB
/
metrics-influxdb-arc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
#!/usr/bin/python
import os
import time
# ARC job states
arex_job_states = [
"ACCEPTED",
"PREPARING",
"SUBMIT",
"INLRMS",
"FINISHING",
"FINISHED",
"DELETED",
"CANCELLING"
]
# Get numbers of jobs in each state
def getJobsStatesInfo():
control_subdirs = ['accepting', 'finished', 'processing', 'restarting']
data = ''
states = {}
for control_subdir in control_subdirs:
subdir = os.path.join('/var/spool/arc/jobstatus', control_subdir)
if not os.path.isdir(subdir):
return
try:
for status_file in os.listdir(subdir):
try:
f = open(os.path.join(subdir, status_file))
except IOError, e:
print 'Could not open status file %s: %s' % status_file, str(e)
continue
status = f.readline().strip()
if status in states:
states[status] += 1
else:
states[status] = 1
f.close()
except OSError, e:
print 'Could not list status files in %s: %s' % subdir, str(e)
for state in arex_job_states:
if state in states:
value = states[state]
else:
value = 0
data += 'jobs,state=' + state + ' value=' + str(value) + '\n'
return data
# Get number of jobs in the processing subdirectory
def getProcessingJobs():
processing = 0
processing_dir = '/var/spool/arc/jobstatus/processing'
try:
entries = os.listdir(processing_dir)
except OSError, e:
print "Error listing dir %s: %s" % processing_dir, str(e)
return
processing += len(entries)
data = 'jobs,state=PROCESSING value=' + str(processing) + '\n'
return data
# Get the time since the modification timestamp of the gm-heartbeat file
def getHeartBeatInfo():
heartbeat = '/var/spool/arc/jobstatus/gm-heartbeat'
try:
statinfo = os.stat(heartbeat)
except OSError, e:
print "Error with heartbeat file: %s" %str(e)
return
mtime = statinfo.st_mtime
now = time.time()
heartbeat_time = now - mtime
data = 'arex_heartbeat_lastseen value=' + str(heartbeat_time) + '\n'
return data
# Generate metrics
data = ''
data += getJobsStatesInfo()
data += getProcessingJobs()
data += getHeartBeatInfo()
print data