Skip to content

Commit 1db344e

Browse files
Top N indices auto deletion config & functionality (opensearch-project#172)
Signed-off-by: David Zane <davizane@amazon.com> (cherry picked from commit 90ead0c) Signed-off-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
1 parent 500d0e2 commit 1db344e

12 files changed

+444
-40
lines changed

src/main/java/org/opensearch/plugin/insights/QueryInsightsPlugin.java

+2-1
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ public Collection<Object> createComponents(
8282
OperationalMetricsCounter.initialize(clusterService.getClusterName().toString(), metricsRegistry);
8383
// create top n queries service
8484
final QueryInsightsService queryInsightsService = new QueryInsightsService(
85-
clusterService.getClusterSettings(),
85+
clusterService,
8686
threadPool,
8787
client,
8888
metricsRegistry,
@@ -145,6 +145,7 @@ public List<Setting<?>> getSettings() {
145145
QueryInsightsSettings.TOP_N_QUERIES_GROUPING_FIELD_NAME,
146146
QueryInsightsSettings.TOP_N_QUERIES_GROUPING_FIELD_TYPE,
147147
QueryCategorizationSettings.SEARCH_QUERY_METRICS_ENABLED_SETTING,
148+
QueryInsightsSettings.TOP_N_EXPORTER_DELETE_AFTER,
148149
QueryCategorizationSettings.SEARCH_QUERY_FIELD_TYPE_CACHE_SIZE_KEY
149150
);
150151
}

src/main/java/org/opensearch/plugin/insights/core/exporter/LocalIndexExporter.java

+62-8
Original file line numberDiff line numberDiff line change
@@ -8,22 +8,31 @@
88

99
package org.opensearch.plugin.insights.core.exporter;
1010

11+
import static org.opensearch.plugin.insights.core.service.TopQueriesService.isTopQueriesIndex;
12+
import static org.opensearch.plugin.insights.settings.QueryInsightsSettings.DEFAULT_DELETE_AFTER_VALUE;
13+
14+
import java.time.Instant;
1115
import java.time.ZoneOffset;
1216
import java.time.ZonedDateTime;
1317
import java.time.format.DateTimeFormatter;
1418
import java.util.List;
19+
import java.util.Locale;
20+
import java.util.Map;
21+
import java.util.concurrent.TimeUnit;
1522
import org.apache.logging.log4j.LogManager;
1623
import org.apache.logging.log4j.Logger;
1724
import org.opensearch.action.bulk.BulkRequestBuilder;
1825
import org.opensearch.action.bulk.BulkResponse;
1926
import org.opensearch.action.index.IndexRequest;
2027
import org.opensearch.client.Client;
28+
import org.opensearch.cluster.metadata.IndexMetadata;
2129
import org.opensearch.common.unit.TimeValue;
2230
import org.opensearch.common.xcontent.XContentFactory;
2331
import org.opensearch.core.action.ActionListener;
2432
import org.opensearch.core.xcontent.ToXContent;
2533
import org.opensearch.plugin.insights.core.metrics.OperationalMetric;
2634
import org.opensearch.plugin.insights.core.metrics.OperationalMetricsCounter;
35+
import org.opensearch.plugin.insights.core.service.TopQueriesService;
2736
import org.opensearch.plugin.insights.rules.model.SearchQueryRecord;
2837

2938
/**
@@ -36,6 +45,7 @@ public final class LocalIndexExporter implements QueryInsightsExporter {
3645
private final Logger logger = LogManager.getLogger();
3746
private final Client client;
3847
private DateTimeFormatter indexPattern;
48+
private int deleteAfter;
3949

4050
/**
4151
* Constructor of LocalIndexExporter
@@ -46,6 +56,7 @@ public final class LocalIndexExporter implements QueryInsightsExporter {
4656
public LocalIndexExporter(final Client client, final DateTimeFormatter indexPattern) {
4757
this.indexPattern = indexPattern;
4858
this.client = client;
59+
this.deleteAfter = DEFAULT_DELETE_AFTER_VALUE;
4960
}
5061

5162
/**
@@ -61,11 +72,9 @@ public DateTimeFormatter getIndexPattern() {
6172
* Setter of indexPattern
6273
*
6374
* @param indexPattern index pattern
64-
* @return the current LocalIndexExporter
6575
*/
66-
public LocalIndexExporter setIndexPattern(DateTimeFormatter indexPattern) {
76+
void setIndexPattern(DateTimeFormatter indexPattern) {
6777
this.indexPattern = indexPattern;
68-
return this;
6978
}
7079

7180
/**
@@ -75,15 +84,15 @@ public LocalIndexExporter setIndexPattern(DateTimeFormatter indexPattern) {
7584
*/
7685
@Override
7786
public void export(final List<SearchQueryRecord> records) {
78-
if (records == null || records.size() == 0) {
87+
if (records == null || records.isEmpty()) {
7988
return;
8089
}
8190
try {
82-
final String index = getDateTimeFromFormat();
91+
final String indexName = buildLocalIndexName();
8392
final BulkRequestBuilder bulkRequestBuilder = client.prepareBulk().setTimeout(TimeValue.timeValueMinutes(1));
8493
for (SearchQueryRecord record : records) {
8594
bulkRequestBuilder.add(
86-
new IndexRequest(index).source(record.toXContent(XContentFactory.jsonBuilder(), ToXContent.EMPTY_PARAMS))
95+
new IndexRequest(indexName).source(record.toXContent(XContentFactory.jsonBuilder(), ToXContent.EMPTY_PARAMS))
8796
);
8897
}
8998
bulkRequestBuilder.execute(new ActionListener<BulkResponse>() {
@@ -110,7 +119,52 @@ public void close() {
110119
logger.debug("Closing the LocalIndexExporter..");
111120
}
112121

113-
private String getDateTimeFromFormat() {
114-
return indexPattern.format(ZonedDateTime.now(ZoneOffset.UTC));
122+
/**
123+
* Builds the local index name using the current UTC datetime
124+
*
125+
* @return A string representing the index name in the format "top_queries-YYYY.MM.dd-01234".
126+
*/
127+
String buildLocalIndexName() {
128+
return indexPattern.format(ZonedDateTime.now(ZoneOffset.UTC)) + "-" + generateLocalIndexDateHash();
129+
}
130+
131+
/**
132+
* Set local index exporter data retention period
133+
*
134+
* @param deleteAfter the number of days after which Top N local indices should be deleted
135+
*/
136+
public void setDeleteAfter(final int deleteAfter) {
137+
this.deleteAfter = deleteAfter;
138+
}
139+
140+
/**
141+
* Delete Top N local indices older than the configured data retention period
142+
*
143+
* @param indexMetadataMap Map of index name {@link String} to {@link IndexMetadata}
144+
*/
145+
public void deleteExpiredTopNIndices(final Map<String, IndexMetadata> indexMetadataMap) {
146+
long expirationMillisLong = System.currentTimeMillis() - TimeUnit.DAYS.toMillis(deleteAfter);
147+
for (Map.Entry<String, IndexMetadata> entry : indexMetadataMap.entrySet()) {
148+
String indexName = entry.getKey();
149+
if (isTopQueriesIndex(indexName) && entry.getValue().getCreationDate() <= expirationMillisLong) {
150+
// delete this index
151+
TopQueriesService.deleteSingleIndex(indexName, client);
152+
}
153+
}
154+
}
155+
156+
/**
157+
* Generates a consistent 5-digit numeric hash based on the current UTC date.
158+
* The generated hash is deterministic, meaning it will return the same result for the same date.
159+
*
160+
* @return A 5-digit numeric string representation of the current date's hash.
161+
*/
162+
public static String generateLocalIndexDateHash() {
163+
// Get the current date in UTC (yyyy-MM-dd format)
164+
String currentDate = DateTimeFormatter.ofPattern("yyyy-MM-dd", Locale.ROOT)
165+
.format(Instant.now().atOffset(ZoneOffset.UTC).toLocalDate());
166+
167+
// Generate a 5-digit numeric hash from the date's hashCode
168+
return String.format(Locale.ROOT, "%05d", (currentDate.hashCode() % 100000 + 100000) % 100000);
115169
}
116170
}

src/main/java/org/opensearch/plugin/insights/core/metrics/OperationalMetric.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,9 @@
1313
public enum OperationalMetric {
1414
LOCAL_INDEX_READER_PARSING_EXCEPTIONS("Number of errors when parsing with LocalIndexReader"),
1515
LOCAL_INDEX_EXPORTER_BULK_FAILURES("Number of failures when ingesting Query Insights data to local indices"),
16+
LOCAL_INDEX_EXPORTER_DELETE_FAILURES("Number of failures when deleting local indices"),
1617
LOCAL_INDEX_EXPORTER_EXCEPTIONS("Number of exceptions in Query Insights LocalIndexExporter"),
1718
INVALID_EXPORTER_TYPE_FAILURES("Number of invalid exporter type failures"),
18-
INVALID_INDEX_PATTERN_EXCEPTIONS("Number of invalid index pattern exceptions"),
1919
DATA_INGEST_EXCEPTIONS("Number of exceptions during data ingest in Query Insights"),
2020
QUERY_CATEGORIZE_EXCEPTIONS("Number of exceptions when categorizing the queries"),
2121
EXPORTER_FAIL_TO_CLOSE_EXCEPTION("Number of failures when closing the exporter"),

src/main/java/org/opensearch/plugin/insights/core/reader/LocalIndexReader.java

+6-4
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88

99
package org.opensearch.plugin.insights.core.reader;
1010

11+
import static org.opensearch.plugin.insights.core.exporter.LocalIndexExporter.generateLocalIndexDateHash;
12+
1113
import java.time.ZoneOffset;
1214
import java.time.ZonedDateTime;
1315
import java.time.format.DateTimeFormatter;
@@ -99,8 +101,8 @@ public List<SearchQueryRecord> read(final String from, final String to) {
99101
}
100102
ZonedDateTime curr = start;
101103
while (curr.isBefore(end.plusDays(1).toLocalDate().atStartOfDay(end.getZone()))) {
102-
String index = getDateTimeFromFormat(curr);
103-
SearchRequest searchRequest = new SearchRequest(index);
104+
String indexName = buildLocalIndexName(curr);
105+
SearchRequest searchRequest = new SearchRequest(indexName);
104106
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
105107
MatchQueryBuilder excludeQuery = QueryBuilders.matchQuery("indices", "top_queries*");
106108
RangeQueryBuilder rangeQuery = QueryBuilders.rangeQuery("timestamp")
@@ -135,7 +137,7 @@ public void close() {
135137
logger.debug("Closing the LocalIndexReader..");
136138
}
137139

138-
private String getDateTimeFromFormat(ZonedDateTime current) {
139-
return current.format(indexPattern);
140+
private String buildLocalIndexName(ZonedDateTime current) {
141+
return current.format(indexPattern) + "-" + generateLocalIndexDateHash();
140142
}
141143
}

src/main/java/org/opensearch/plugin/insights/core/service/QueryInsightsService.java

+69-20
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
import static org.opensearch.plugin.insights.settings.QueryInsightsSettings.DEFAULT_GROUPING_TYPE;
1212
import static org.opensearch.plugin.insights.settings.QueryInsightsSettings.QUERY_INSIGHTS_EXECUTOR;
13+
import static org.opensearch.plugin.insights.settings.QueryInsightsSettings.TOP_N_EXPORTER_DELETE_AFTER;
1314
import static org.opensearch.plugin.insights.settings.QueryInsightsSettings.getExporterSettings;
1415

1516
import java.io.IOException;
@@ -19,13 +20,15 @@
1920
import java.util.List;
2021
import java.util.Map;
2122
import java.util.concurrent.LinkedBlockingQueue;
23+
import java.util.concurrent.TimeUnit;
2224
import java.util.stream.Collectors;
2325
import org.apache.logging.log4j.LogManager;
2426
import org.apache.logging.log4j.Logger;
2527
import org.opensearch.client.Client;
28+
import org.opensearch.cluster.metadata.IndexMetadata;
29+
import org.opensearch.cluster.service.ClusterService;
2630
import org.opensearch.common.inject.Inject;
2731
import org.opensearch.common.lifecycle.AbstractLifecycleComponent;
28-
import org.opensearch.common.settings.ClusterSettings;
2932
import org.opensearch.common.settings.Settings;
3033
import org.opensearch.common.unit.TimeValue;
3134
import org.opensearch.core.xcontent.NamedXContentRegistry;
@@ -52,13 +55,15 @@ public class QueryInsightsService extends AbstractLifecycleComponent {
5255

5356
private static final Logger logger = LogManager.getLogger(QueryInsightsService.class);
5457

58+
private final ClusterService clusterService;
59+
5560
/**
5661
* The internal OpenSearch thread pool that execute async processing and exporting tasks
5762
*/
5863
private final ThreadPool threadPool;
5964

6065
/**
61-
* Services to capture top n queries for different metric types
66+
* Map of {@link MetricType} to associated {@link TopQueriesService}
6267
*/
6368
private final Map<MetricType, TopQueriesService> topQueriesServices;
6469

@@ -73,10 +78,10 @@ public class QueryInsightsService extends AbstractLifecycleComponent {
7378
private final LinkedBlockingQueue<SearchQueryRecord> queryRecordsQueue;
7479

7580
/**
76-
* Holds a reference to delayed operation {@link Scheduler.Cancellable} so it can be cancelled when
81+
* List of references to delayed operations {@link Scheduler.Cancellable} so they can be cancelled when
7782
* the service closed concurrently.
7883
*/
79-
protected volatile Scheduler.Cancellable scheduledFuture;
84+
protected volatile List<Scheduler.Cancellable> scheduledFutures;
8085

8186
/**
8287
* Query Insights exporter factory
@@ -102,20 +107,21 @@ public class QueryInsightsService extends AbstractLifecycleComponent {
102107
/**
103108
* Constructor of the QueryInsightsService
104109
*
105-
* @param clusterSettings OpenSearch cluster level settings
110+
* @param clusterService OpenSearch cluster service
106111
* @param threadPool The OpenSearch thread pool to run async tasks
107112
* @param client OS client
108113
* @param metricsRegistry Opentelemetry Metrics registry
109114
* @param namedXContentRegistry NamedXContentRegistry for parsing purposes
110115
*/
111116
@Inject
112117
public QueryInsightsService(
113-
final ClusterSettings clusterSettings,
118+
final ClusterService clusterService,
114119
final ThreadPool threadPool,
115120
final Client client,
116121
final MetricsRegistry metricsRegistry,
117122
final NamedXContentRegistry namedXContentRegistry
118123
) {
124+
this.clusterService = clusterService;
119125
enableCollect = new HashMap<>();
120126
queryRecordsQueue = new LinkedBlockingQueue<>(QueryInsightsSettings.QUERY_RECORD_QUEUE_CAPACITY);
121127
this.threadPool = threadPool;
@@ -128,15 +134,22 @@ public QueryInsightsService(
128134
enableCollect.put(metricType, false);
129135
topQueriesServices.put(
130136
metricType,
131-
new TopQueriesService(metricType, threadPool, queryInsightsExporterFactory, queryInsightsReaderFactory)
137+
new TopQueriesService(client, metricType, threadPool, queryInsightsExporterFactory, queryInsightsReaderFactory)
132138
);
133139
}
134140
for (MetricType type : MetricType.allMetricTypes()) {
135-
clusterSettings.addSettingsUpdateConsumer(
136-
getExporterSettings(type),
137-
(settings -> setExporterAndReader(type, settings)),
138-
(settings -> validateExporterAndReaderConfig(type, settings))
139-
);
141+
clusterService.getClusterSettings()
142+
.addSettingsUpdateConsumer(
143+
getExporterSettings(type),
144+
(settings -> setExporterAndReader(type, settings, clusterService.state().metadata().indices())),
145+
(settings -> validateExporterAndReaderConfig(type, settings))
146+
);
147+
clusterService.getClusterSettings()
148+
.addSettingsUpdateConsumer(
149+
TOP_N_EXPORTER_DELETE_AFTER,
150+
(settings -> setExporterDeleteAfterAndDelete(type, settings)),
151+
(TopQueriesService::validateExporterDeleteAfter)
152+
);
140153
}
141154

142155
this.searchQueryCategorizer = SearchQueryCategorizer.getInstance(metricsRegistry);
@@ -389,14 +402,27 @@ public void setTopNSize(final MetricType type, final int topNSize) {
389402
* @param type {@link MetricType}
390403
* @param settings exporter and reader settings
391404
*/
392-
public void setExporterAndReader(final MetricType type, final Settings settings) {
405+
private void setExporterAndReader(final MetricType type, final Settings settings, final Map<String, IndexMetadata> indexMetadataMap) {
393406
if (topQueriesServices.containsKey(type)) {
394407
TopQueriesService tqs = topQueriesServices.get(type);
395-
tqs.setExporter(settings);
408+
tqs.setExporter(settings, indexMetadataMap);
396409
tqs.setReader(settings, namedXContentRegistry);
397410
}
398411
}
399412

413+
/**
414+
* Set the exporter delete after, then delete expired Top N indices
415+
*
416+
* @param type {@link MetricType}
417+
* @param deleteAfter the number of days after which Top N local indices should be deleted
418+
*/
419+
private void setExporterDeleteAfterAndDelete(final MetricType type, final int deleteAfter) {
420+
if (topQueriesServices.containsKey(type)) {
421+
topQueriesServices.get(type).setExporterDeleteAfter(deleteAfter);
422+
deleteExpiredTopNIndices();
423+
}
424+
}
425+
400426
/**
401427
* Get search query categorizer object
402428
* @return SearchQueryCategorizer object
@@ -421,18 +447,32 @@ public void validateExporterAndReaderConfig(final MetricType type, final Setting
421447
@Override
422448
protected void doStart() {
423449
if (isAnyFeatureEnabled()) {
424-
scheduledFuture = threadPool.scheduleWithFixedDelay(
425-
this::drainRecords,
426-
QueryInsightsSettings.QUERY_RECORD_QUEUE_DRAIN_INTERVAL,
427-
QueryInsightsSettings.QUERY_INSIGHTS_EXECUTOR
450+
scheduledFutures = new ArrayList<>();
451+
scheduledFutures.add(
452+
threadPool.scheduleWithFixedDelay(
453+
this::drainRecords,
454+
QueryInsightsSettings.QUERY_RECORD_QUEUE_DRAIN_INTERVAL,
455+
QueryInsightsSettings.QUERY_INSIGHTS_EXECUTOR
456+
)
457+
);
458+
scheduledFutures.add(
459+
threadPool.scheduleWithFixedDelay(
460+
this::deleteExpiredTopNIndices,
461+
new TimeValue(1, TimeUnit.DAYS), // Check for deletable indices once per day
462+
QueryInsightsSettings.QUERY_INSIGHTS_EXECUTOR
463+
)
428464
);
429465
}
430466
}
431467

432468
@Override
433469
protected void doStop() {
434-
if (scheduledFuture != null) {
435-
scheduledFuture.cancel();
470+
if (scheduledFutures != null) {
471+
for (Scheduler.Cancellable cancellable : scheduledFutures) {
472+
if (cancellable != null) {
473+
cancellable.cancel();
474+
}
475+
}
436476
}
437477
}
438478

@@ -462,4 +502,13 @@ public QueryInsightsHealthStats getHealthStats() {
462502
topQueriesHealthStatsMap
463503
);
464504
}
505+
506+
/**
507+
* Delete Top N local indices older than the configured data retention period
508+
*/
509+
private void deleteExpiredTopNIndices() {
510+
for (MetricType metricType : MetricType.allMetricTypes()) {
511+
topQueriesServices.get(metricType).deleteExpiredTopNIndices(clusterService.state().metadata().indices());
512+
}
513+
}
465514
}

0 commit comments

Comments
 (0)