18
18
package org .compuscene .metrics .prometheus ;
19
19
20
20
import org .opensearch .action .ClusterStatsData ;
21
+ import org .opensearch .action .SnapshotsResponse ;
21
22
import org .opensearch .action .admin .cluster .health .ClusterHealthResponse ;
22
23
import org .opensearch .action .admin .cluster .node .stats .NodeStats ;
23
24
import org .opensearch .action .admin .indices .stats .CommonStats ;
37
38
import org .opensearch .monitor .os .OsStats ;
38
39
import org .opensearch .monitor .process .ProcessStats ;
39
40
import org .opensearch .script .ScriptStats ;
41
+ import org .opensearch .snapshots .SnapshotInfo ;
42
+ import org .opensearch .snapshots .SnapshotState ;
40
43
import org .opensearch .threadpool .ThreadPoolStats ;
41
44
import org .opensearch .transport .TransportStats ;
42
45
@@ -54,19 +57,23 @@ public class PrometheusMetricsCollector {
54
57
55
58
private boolean isPrometheusClusterSettings ;
56
59
private boolean isPrometheusIndices ;
60
+ private boolean isPrometheusSnapshots ;
57
61
private PrometheusMetricsCatalog catalog ;
58
62
59
63
/**
60
64
* A constructor.
61
65
* @param catalog {@link PrometheusMetricsCatalog}
62
66
* @param isPrometheusIndices boolean flag for index level metric
67
+ * @param isPrometheusSnapshots boolean flag for snapshots related metrics
63
68
* @param isPrometheusClusterSettings boolean flag cluster settings metrics
64
69
*/
65
70
public PrometheusMetricsCollector (PrometheusMetricsCatalog catalog ,
66
71
boolean isPrometheusIndices ,
72
+ boolean isPrometheusSnapshots ,
67
73
boolean isPrometheusClusterSettings ) {
68
74
this .isPrometheusClusterSettings = isPrometheusClusterSettings ;
69
75
this .isPrometheusIndices = isPrometheusIndices ;
76
+ this .isPrometheusSnapshots = isPrometheusSnapshots ;
70
77
this .catalog = catalog ;
71
78
}
72
79
@@ -80,6 +87,7 @@ public void registerMetrics() {
80
87
registerNodeMetrics ();
81
88
registerIndicesMetrics ();
82
89
registerPerIndexMetrics ();
90
+ registerSnapshotMetrics ();
83
91
registerTransportMetrics ();
84
92
registerHTTPMetrics ();
85
93
registerThreadPoolMetrics ();
@@ -465,6 +473,30 @@ private void updatePerIndexMetrics(@Nullable ClusterHealthResponse chr, @Nullabl
465
473
}
466
474
}
467
475
476
+ @ SuppressWarnings ("checkstyle:LineLength" )
477
+ private void registerSnapshotMetrics () {
478
+ catalog .registerClusterGauge ("min_snapshot_age" , "Time elapsed in milliseconds since the most recent successful snapshot's start time" , "sm_policy" );
479
+ }
480
+
481
+ private void updateSnapshotsMetrics (@ Nullable SnapshotsResponse snapshotsResponse ) {
482
+ if (snapshotsResponse == null ) {
483
+ return ;
484
+ }
485
+ Map <String , Long > smPolicyMinSnapshotAge = new HashMap <>();
486
+ for (SnapshotInfo snapshotInfo : snapshotsResponse .getSnapshotInfos ()) {
487
+ // emit min_snapshot_age metric only for successful snapshots
488
+ if (snapshotInfo .state () != SnapshotState .SUCCESS ) {
489
+ continue ;
490
+ }
491
+ String smPolicy = snapshotInfo .userMetadata () == null ? "adhoc" : snapshotInfo .userMetadata ().getOrDefault ("sm_policy" , "adhoc" ).toString ();
492
+ long snapshotAge = System .currentTimeMillis () - snapshotInfo .startTime ();
493
+ smPolicyMinSnapshotAge .compute (smPolicy , (key , oldValue ) -> oldValue == null ? snapshotAge : Math .min (oldValue , snapshotAge ));
494
+ }
495
+ for (Map .Entry <String , Long > entry : smPolicyMinSnapshotAge .entrySet ()) {
496
+ catalog .setClusterGauge ("min_snapshot_age" , entry .getValue (), entry .getKey ());
497
+ }
498
+ }
499
+
468
500
@ SuppressWarnings ("checkstyle:LineLength" )
469
501
private void updatePerIndexContextMetrics (String indexName , String context , CommonStats idx ) {
470
502
catalog .setClusterGauge ("index_doc_number" , idx .getDocs ().getCount (), indexName , context );
@@ -920,12 +952,14 @@ private void updateESSettings(@Nullable ClusterStatsData stats) {
920
952
* @param nodeStats NodeStats filtered using nodes filter
921
953
* @param indicesStats IndicesStatsResponse
922
954
* @param clusterStatsData ClusterStatsData
955
+ * @param snapshotsResponse SnapshotsResponse
923
956
*/
924
957
public void updateMetrics (String originNodeName , String originNodeId ,
925
958
@ Nullable ClusterHealthResponse clusterHealthResponse ,
926
959
NodeStats [] nodeStats ,
927
960
@ Nullable IndicesStatsResponse indicesStats ,
928
- @ Nullable ClusterStatsData clusterStatsData ) {
961
+ @ Nullable ClusterStatsData clusterStatsData ,
962
+ @ Nullable SnapshotsResponse snapshotsResponse ) {
929
963
Summary .Timer timer = catalog .startSummaryTimer (
930
964
new Tuple <>(originNodeName , originNodeId ),
931
965
"metrics_generate_time_seconds" );
@@ -956,7 +990,9 @@ public void updateMetrics(String originNodeName, String originNodeId,
956
990
if (isPrometheusClusterSettings ) {
957
991
updateESSettings (clusterStatsData );
958
992
}
959
-
993
+ if (isPrometheusSnapshots ) {
994
+ updateSnapshotsMetrics (snapshotsResponse );
995
+ }
960
996
timer .observeDuration ();
961
997
}
962
998
0 commit comments