Skip to content

Commit 818d97f

Browse files
psmitjlukas-vlcek
authored andcommitted
Added support for snapshot related metrics
Closes: #295 Closes: #165 Note: This commit is a result of squashing the following three commits: * 2ac18e9 * a3a4058 * 03bd45f Co-authored-by: Smit Patel <psmit@uber.com> Signed-off-by: Lukáš Vlček <lukas.vlcek@aiven.io>
1 parent 8efef9a commit 818d97f

File tree

10 files changed

+306
-14
lines changed

10 files changed

+306
-14
lines changed

README.md

+7
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,13 @@ To disable exporting cluster settings use:
152152
prometheus.cluster.settings: false
153153
```
154154

155+
#### Snapshot metrics
156+
157+
By default, snapshot metrics are disabled. To enable exporting snapshot metrics use:
158+
```
159+
prometheus.snapshots: true
160+
```
161+
155162
#### Nodes filter
156163

157164
Metrics include statistics about individual OpenSearch nodes.

build.gradle

+7-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import org.opensearch.gradle.PropertyNormalization
12
import org.opensearch.gradle.test.RestIntegTestTask
23

34
import java.util.concurrent.Callable
@@ -113,7 +114,7 @@ dependencies {
113114

114115
restResources {
115116
restApi {
116-
includeCore '_common', 'cat', 'cluster', 'nodes', 'indices', 'index'
117+
includeCore '_common', 'cat', 'cluster', 'nodes', 'indices', 'index', 'snapshot'
117118
}
118119
}
119120

@@ -139,8 +140,13 @@ tasks.named("check").configure { dependsOn(integTest) }
139140
// Temporary disable task :testingConventions
140141
testingConventions.enabled = false
141142

143+
// Directory for snapshot repository
144+
File repositoryDir = new File(project.layout.buildDirectory.get().asFile, "shared-repository")
145+
142146
testClusters.all {
143147
numberOfNodes = 2
148+
// Configuring repo path for 'fs' type snapshot repository
149+
setting 'path.repo', "${repositoryDir.absolutePath}", PropertyNormalization.IGNORE_VALUE
144150

145151
// It seems cluster name can not be customized here. It gives an error:
146152
// Testclusters does not allow the following settings to be changed:[cluster.name] for node{::yamlRestTest-0}

src/main/java/org/compuscene/metrics/prometheus/PrometheusMetricsCollector.java

+38-2
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
package org.compuscene.metrics.prometheus;
1919

2020
import org.opensearch.action.ClusterStatsData;
21+
import org.opensearch.action.SnapshotsResponse;
2122
import org.opensearch.action.admin.cluster.health.ClusterHealthResponse;
2223
import org.opensearch.action.admin.cluster.node.stats.NodeStats;
2324
import org.opensearch.action.admin.indices.stats.CommonStats;
@@ -37,6 +38,8 @@
3738
import org.opensearch.monitor.os.OsStats;
3839
import org.opensearch.monitor.process.ProcessStats;
3940
import org.opensearch.script.ScriptStats;
41+
import org.opensearch.snapshots.SnapshotInfo;
42+
import org.opensearch.snapshots.SnapshotState;
4043
import org.opensearch.threadpool.ThreadPoolStats;
4144
import org.opensearch.transport.TransportStats;
4245

@@ -54,19 +57,23 @@ public class PrometheusMetricsCollector {
5457

5558
private boolean isPrometheusClusterSettings;
5659
private boolean isPrometheusIndices;
60+
private boolean isPrometheusSnapshots;
5761
private PrometheusMetricsCatalog catalog;
5862

5963
/**
6064
* A constructor.
6165
* @param catalog {@link PrometheusMetricsCatalog}
6266
* @param isPrometheusIndices boolean flag for index level metric
67+
* @param isPrometheusSnapshots boolean flag for snapshots related metrics
6368
* @param isPrometheusClusterSettings boolean flag cluster settings metrics
6469
*/
6570
public PrometheusMetricsCollector(PrometheusMetricsCatalog catalog,
6671
boolean isPrometheusIndices,
72+
boolean isPrometheusSnapshots,
6773
boolean isPrometheusClusterSettings) {
6874
this.isPrometheusClusterSettings = isPrometheusClusterSettings;
6975
this.isPrometheusIndices = isPrometheusIndices;
76+
this.isPrometheusSnapshots = isPrometheusSnapshots;
7077
this.catalog = catalog;
7178
}
7279

@@ -80,6 +87,7 @@ public void registerMetrics() {
8087
registerNodeMetrics();
8188
registerIndicesMetrics();
8289
registerPerIndexMetrics();
90+
registerSnapshotMetrics();
8391
registerTransportMetrics();
8492
registerHTTPMetrics();
8593
registerThreadPoolMetrics();
@@ -465,6 +473,30 @@ private void updatePerIndexMetrics(@Nullable ClusterHealthResponse chr, @Nullabl
465473
}
466474
}
467475

476+
@SuppressWarnings("checkstyle:LineLength")
477+
private void registerSnapshotMetrics() {
478+
catalog.registerClusterGauge("min_snapshot_age", "Time elapsed in milliseconds since the most recent successful snapshot's start time", "sm_policy");
479+
}
480+
481+
private void updateSnapshotsMetrics(@Nullable SnapshotsResponse snapshotsResponse) {
482+
if (snapshotsResponse == null) {
483+
return;
484+
}
485+
Map<String, Long> smPolicyMinSnapshotAge = new HashMap<>();
486+
for (SnapshotInfo snapshotInfo : snapshotsResponse.getSnapshotInfos()) {
487+
// emit min_snapshot_age metric only for successful snapshots
488+
if (snapshotInfo.state() != SnapshotState.SUCCESS) {
489+
continue;
490+
}
491+
String smPolicy = snapshotInfo.userMetadata() == null ? "adhoc" : snapshotInfo.userMetadata().getOrDefault("sm_policy", "adhoc").toString();
492+
long snapshotAge = System.currentTimeMillis() - snapshotInfo.startTime();
493+
smPolicyMinSnapshotAge.compute(smPolicy, (key, oldValue) -> oldValue == null ? snapshotAge : Math.min(oldValue, snapshotAge));
494+
}
495+
for(Map.Entry<String, Long> entry : smPolicyMinSnapshotAge.entrySet()) {
496+
catalog.setClusterGauge("min_snapshot_age", entry.getValue(), entry.getKey());
497+
}
498+
}
499+
468500
@SuppressWarnings("checkstyle:LineLength")
469501
private void updatePerIndexContextMetrics(String indexName, String context, CommonStats idx) {
470502
catalog.setClusterGauge("index_doc_number", idx.getDocs().getCount(), indexName, context);
@@ -920,12 +952,14 @@ private void updateESSettings(@Nullable ClusterStatsData stats) {
920952
* @param nodeStats NodeStats filtered using nodes filter
921953
* @param indicesStats IndicesStatsResponse
922954
* @param clusterStatsData ClusterStatsData
955+
* @param snapshotsResponse SnapshotsResponse
923956
*/
924957
public void updateMetrics(String originNodeName, String originNodeId,
925958
@Nullable ClusterHealthResponse clusterHealthResponse,
926959
NodeStats[] nodeStats,
927960
@Nullable IndicesStatsResponse indicesStats,
928-
@Nullable ClusterStatsData clusterStatsData) {
961+
@Nullable ClusterStatsData clusterStatsData,
962+
@Nullable SnapshotsResponse snapshotsResponse) {
929963
Summary.Timer timer = catalog.startSummaryTimer(
930964
new Tuple<>(originNodeName, originNodeId),
931965
"metrics_generate_time_seconds");
@@ -956,7 +990,9 @@ public void updateMetrics(String originNodeName, String originNodeId,
956990
if (isPrometheusClusterSettings) {
957991
updateESSettings(clusterStatsData);
958992
}
959-
993+
if (isPrometheusSnapshots) {
994+
updateSnapshotsMetrics(snapshotsResponse);
995+
}
960996
timer.observeDuration();
961997
}
962998

src/main/java/org/compuscene/metrics/prometheus/PrometheusSettings.java

+26-2
Original file line numberDiff line numberDiff line change
@@ -49,26 +49,35 @@ public enum INDEX_FILTER_OPTIONS {
4949

5050
static String PROMETHEUS_CLUSTER_SETTINGS_KEY = "prometheus.cluster.settings";
5151
static String PROMETHEUS_INDICES_KEY = "prometheus.indices";
52+
static String PROMETHEUS_SNAPSHOTS_KEY = "prometheus.snapshots";
5253
static String PROMETHEUS_NODES_FILTER_KEY = "prometheus.nodes.filter";
5354
static String PROMETHEUS_SELECTED_INDICES_KEY = "prometheus.indices_filter.selected_indices";
5455
static String PROMETHEUS_SELECTED_OPTION_KEY = "prometheus.indices_filter.selected_option";
5556

5657
/**
57-
* This setting is used configure weather to expose cluster settings metrics or not. The default value is true.
58+
* This setting is used configure whether to expose cluster settings metrics or not. The default value is true.
5859
* Can be configured in opensearch.yml file or update dynamically under key {@link #PROMETHEUS_CLUSTER_SETTINGS_KEY}.
5960
*/
6061
public static final Setting<Boolean> PROMETHEUS_CLUSTER_SETTINGS =
6162
Setting.boolSetting(PROMETHEUS_CLUSTER_SETTINGS_KEY, true,
6263
Setting.Property.Dynamic, Setting.Property.NodeScope);
6364

6465
/**
65-
* This setting is used configure weather to expose low level index metrics or not. The default value is true.
66+
* This setting is used configure whether to expose low level index metrics or not. The default value is true.
6667
* Can be configured in opensearch.yml file or update dynamically under key {@link #PROMETHEUS_INDICES_KEY}.
6768
*/
6869
public static final Setting<Boolean> PROMETHEUS_INDICES =
6970
Setting.boolSetting(PROMETHEUS_INDICES_KEY, true,
7071
Setting.Property.Dynamic, Setting.Property.NodeScope);
7172

73+
/**
74+
* This setting is used configure whether to expose snapshot metrics or not. The default value is false.
75+
* Can be configured in opensearch.yml file or update dynamically under key {@link #PROMETHEUS_SNAPSHOTS_KEY}.
76+
*/
77+
public static final Setting<Boolean> PROMETHEUS_SNAPSHOTS =
78+
Setting.boolSetting(PROMETHEUS_SNAPSHOTS_KEY, false,
79+
Setting.Property.Dynamic, Setting.Property.NodeScope);
80+
7281
/**
7382
* This setting is used configure which cluster nodes to gather metrics from. The default value is _local.
7483
* Can be configured in opensearch.yml file or update dynamically under key {@link #PROMETHEUS_NODES_FILTER_KEY}.
@@ -97,6 +106,7 @@ public enum INDEX_FILTER_OPTIONS {
97106

98107
private volatile boolean clusterSettings;
99108
private volatile boolean indices;
109+
private volatile boolean snapshots;
100110
private volatile String nodesFilter;
101111
private volatile String selectedIndices;
102112
private volatile INDEX_FILTER_OPTIONS selectedOption;
@@ -109,11 +119,13 @@ public enum INDEX_FILTER_OPTIONS {
109119
public PrometheusSettings(Settings settings, ClusterSettings clusterSettings) {
110120
setPrometheusClusterSettings(PROMETHEUS_CLUSTER_SETTINGS.get(settings));
111121
setPrometheusIndices(PROMETHEUS_INDICES.get(settings));
122+
setPrometheusSnapshots(PROMETHEUS_SNAPSHOTS.get(settings));
112123
setPrometheusNodesFilter(PROMETHEUS_NODES_FILTER.get(settings));
113124
setPrometheusSelectedIndices(PROMETHEUS_SELECTED_INDICES.get(settings));
114125
setPrometheusSelectedOption(PROMETHEUS_SELECTED_OPTION.get(settings));
115126
clusterSettings.addSettingsUpdateConsumer(PROMETHEUS_CLUSTER_SETTINGS, this::setPrometheusClusterSettings);
116127
clusterSettings.addSettingsUpdateConsumer(PROMETHEUS_INDICES, this::setPrometheusIndices);
128+
clusterSettings.addSettingsUpdateConsumer(PROMETHEUS_SNAPSHOTS, this::setPrometheusSnapshots);
117129
clusterSettings.addSettingsUpdateConsumer(PROMETHEUS_NODES_FILTER, this::setPrometheusNodesFilter);
118130
clusterSettings.addSettingsUpdateConsumer(PROMETHEUS_SELECTED_INDICES, this::setPrometheusSelectedIndices);
119131
clusterSettings.addSettingsUpdateConsumer(PROMETHEUS_SELECTED_OPTION, this::setPrometheusSelectedOption);
@@ -127,6 +139,10 @@ private void setPrometheusIndices(boolean flag) {
127139
this.indices = flag;
128140
}
129141

142+
private void setPrometheusSnapshots(boolean flag) {
143+
this.snapshots = flag;
144+
}
145+
130146
private void setPrometheusNodesFilter(String filter) { this.nodesFilter = filter; }
131147

132148
private void setPrometheusSelectedIndices(String selectedIndices) {
@@ -153,6 +169,14 @@ public boolean getPrometheusIndices() {
153169
return this.indices;
154170
}
155171

172+
/**
173+
* Get value of settings key {@link #PROMETHEUS_SNAPSHOTS_KEY}.
174+
* @return boolean value of the key
175+
*/
176+
public boolean getPrometheusSnapshots() {
177+
return this.snapshots;
178+
}
179+
156180
/**
157181
* Get value of settings key {@link #PROMETHEUS_NODES_FILTER_KEY}.
158182
* @return boolean value of the key

src/main/java/org/opensearch/action/NodePrometheusMetricsResponse.java

+23
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
package org.opensearch.action;
1919

20+
import org.opensearch.Version;
2021
import org.opensearch.action.admin.cluster.health.ClusterHealthResponse;
2122
import org.opensearch.action.admin.cluster.node.info.NodesInfoResponse;
2223
import org.opensearch.action.admin.cluster.node.stats.NodeStats;
@@ -29,6 +30,7 @@
2930
import org.opensearch.core.common.io.stream.StreamOutput;
3031
import org.opensearch.common.settings.ClusterSettings;
3132
import org.opensearch.common.settings.Settings;
33+
import org.opensearch.search.pipeline.SearchPipelineStats;
3234

3335
import java.io.IOException;
3436

@@ -43,6 +45,7 @@ public class NodePrometheusMetricsResponse extends ActionResponse {
4345
private final NodeStats[] nodeStats;
4446
@Nullable private final IndicesStatsResponse indicesStats;
4547
private ClusterStatsData clusterStatsData = null;
48+
@Nullable private final SnapshotsResponse snapshotsResponse;
4649

4750
/**
4851
* A constructor that materialize the instance from inputStream.
@@ -56,6 +59,11 @@ public NodePrometheusMetricsResponse(StreamInput in) throws IOException {
5659
nodeStats = in.readArray(NodeStats::new, NodeStats[]::new);
5760
indicesStats = PackageAccessHelper.createIndicesStatsResponse(in);
5861
clusterStatsData = new ClusterStatsData(in);
62+
if (in.getVersion().onOrAfter(Version.V_2_17_1)) {
63+
snapshotsResponse = new SnapshotsResponse(in);
64+
} else {
65+
snapshotsResponse = null;
66+
}
5967
}
6068

6169
/**
@@ -65,6 +73,7 @@ public NodePrometheusMetricsResponse(StreamInput in) throws IOException {
6573
* @param nodesStats NodesStats
6674
* @param indicesStats IndicesStats
6775
* @param clusterStateResponse ClusterStateResponse
76+
* @param snapshotsResponse SnapshotsResponse
6877
* @param settings Settings
6978
* @param clusterSettings ClusterSettings
7079
*/
@@ -73,6 +82,7 @@ public NodePrometheusMetricsResponse(ClusterHealthResponse clusterHealth,
7382
NodeStats[] nodesStats,
7483
@Nullable IndicesStatsResponse indicesStats,
7584
@Nullable ClusterStateResponse clusterStateResponse,
85+
@Nullable SnapshotsResponse snapshotsResponse,
7686
Settings settings,
7787
ClusterSettings clusterSettings) {
7888
this.clusterHealth = clusterHealth;
@@ -82,6 +92,7 @@ public NodePrometheusMetricsResponse(ClusterHealthResponse clusterHealth,
8292
if (clusterStateResponse != null) {
8393
this.clusterStatsData = new ClusterStatsData(clusterStateResponse, settings, clusterSettings);
8494
}
95+
this.snapshotsResponse = snapshotsResponse;
8596
}
8697

8798
/**
@@ -106,6 +117,15 @@ public NodeStats[] getNodeStats() {
106117
return this.nodeStats;
107118
}
108119

120+
/**
121+
* Get internal {@link SnapshotsResponse} object.
122+
* @return SnapshotsResponse object
123+
*/
124+
@Nullable
125+
public SnapshotsResponse getSnapshotsResponse() {
126+
return this.snapshotsResponse;
127+
}
128+
109129
/**
110130
* Get internal {@link IndicesStatsResponse} object.
111131
* @return IndicesStatsResponse object
@@ -131,5 +151,8 @@ public void writeTo(StreamOutput out) throws IOException {
131151
out.writeArray(nodeStats);
132152
out.writeOptionalWriteable(indicesStats);
133153
clusterStatsData.writeTo(out);
154+
if (out.getVersion().onOrAfter(Version.V_2_17_1)) {
155+
snapshotsResponse.writeTo(out);
156+
}
134157
}
135158
}

0 commit comments

Comments
 (0)