Skip to content

Commit 65e4489

Browse files
authored
Query grouping framework for Top N queries and group by query similarity (#66)
* Query grouping framework and group by query similarity Signed-off-by: Siddhant Deshmukh <deshsid@amazon.com> * Spotless apply Signed-off-by: Siddhant Deshmukh <deshsid@amazon.com> * Build fix Signed-off-by: Siddhant Deshmukh <deshsid@amazon.com> * Properly configure settings update consumer Signed-off-by: Siddhant Deshmukh <deshsid@amazon.com> * Address review comments Signed-off-by: Siddhant Deshmukh <deshsid@amazon.com> * Refactor unit tests Signed-off-by: Siddhant Deshmukh <deshsid@amazon.com> * Decouple Measurement and MetricType Signed-off-by: Siddhant Deshmukh <deshsid@amazon.com> * Aggregate type NONE will ensure no aggregations computed Signed-off-by: Siddhant Deshmukh <deshsid@amazon.com> * Perform renaming Signed-off-by: Siddhant Deshmukh <deshsid@amazon.com> * Integrate query shape library with grouping Signed-off-by: Siddhant Deshmukh <deshsid@amazon.com> * Spotless Signed-off-by: Siddhant Deshmukh <deshsid@amazon.com> * Create and consume string hashcode interface Signed-off-by: Siddhant Deshmukh <deshsid@amazon.com> * Health checks in code Signed-off-by: Siddhant Deshmukh <deshsid@amazon.com> * Fix tests and spotless apply Signed-off-by: Siddhant Deshmukh <deshsid@amazon.com> * Minor fixes Signed-off-by: Siddhant Deshmukh <deshsid@amazon.com> * Max groups setting and unit tests Signed-off-by: Siddhant Deshmukh <deshsid@amazon.com> * Address review comments Signed-off-by: Siddhant Deshmukh <deshsid@amazon.com> * Address review comments Signed-off-by: Siddhant Deshmukh <deshsid@amazon.com> * Create query grouper interface and top query store interface Signed-off-by: Siddhant Deshmukh <deshsid@amazon.com> * Address review comments Signed-off-by: Siddhant Deshmukh <deshsid@amazon.com> * Removed unused interface Signed-off-by: Siddhant Deshmukh <deshsid@amazon.com> * Rebase main and spotless Signed-off-by: Siddhant Deshmukh <deshsid@amazon.com> * Renaming variable Signed-off-by: Siddhant Deshmukh <deshsid@amazon.com> * Remove TopQueriesStore interface Signed-off-by: Siddhant Deshmukh <deshsid@amazon.com> * Drain top queries service on group change Signed-off-by: Siddhant Deshmukh <deshsid@amazon.com> * Rename max groups setting and allow minimum 0 Signed-off-by: Siddhant Deshmukh <deshsid@amazon.com> * Make write/read from io backword compatible Signed-off-by: Siddhant Deshmukh <deshsid@amazon.com> * Minor fix Signed-off-by: Siddhant Deshmukh <deshsid@amazon.com> * Refactor query grouper Signed-off-by: Siddhant Deshmukh <deshsid@amazon.com> --------- Signed-off-by: Siddhant Deshmukh <deshsid@amazon.com>
1 parent 3c561e0 commit 65e4489

23 files changed

+1808
-54
lines changed

src/main/java/org/opensearch/plugin/insights/QueryInsightsPlugin.java

+2
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,8 @@ public List<Setting<?>> getSettings() {
130130
QueryInsightsSettings.TOP_N_MEMORY_QUERIES_SIZE,
131131
QueryInsightsSettings.TOP_N_MEMORY_QUERIES_WINDOW_SIZE,
132132
QueryInsightsSettings.TOP_N_MEMORY_EXPORTER_SETTINGS,
133+
QueryInsightsSettings.TOP_N_QUERIES_GROUP_BY,
134+
QueryInsightsSettings.TOP_N_QUERIES_MAX_GROUPS_EXCLUDING_N,
133135
QueryCategorizationSettings.SEARCH_QUERY_METRICS_ENABLED_SETTING
134136
);
135137
}

src/main/java/org/opensearch/plugin/insights/core/listener/QueryInsightsListener.java

+36-4
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
package org.opensearch.plugin.insights.core.listener;
1010

1111
import static org.opensearch.plugin.insights.settings.QueryCategorizationSettings.SEARCH_QUERY_METRICS_ENABLED_SETTING;
12+
import static org.opensearch.plugin.insights.settings.QueryInsightsSettings.TOP_N_QUERIES_GROUP_BY;
13+
import static org.opensearch.plugin.insights.settings.QueryInsightsSettings.TOP_N_QUERIES_MAX_GROUPS_EXCLUDING_N;
1214
import static org.opensearch.plugin.insights.settings.QueryInsightsSettings.getTopNEnabledSetting;
1315
import static org.opensearch.plugin.insights.settings.QueryInsightsSettings.getTopNSizeSetting;
1416
import static org.opensearch.plugin.insights.settings.QueryInsightsSettings.getTopNWindowSizeSetting;
@@ -31,7 +33,9 @@
3133
import org.opensearch.core.tasks.resourcetracker.TaskResourceInfo;
3234
import org.opensearch.core.xcontent.ToXContent;
3335
import org.opensearch.plugin.insights.core.service.QueryInsightsService;
36+
import org.opensearch.plugin.insights.core.service.categorizer.QueryShapeGenerator;
3437
import org.opensearch.plugin.insights.rules.model.Attribute;
38+
import org.opensearch.plugin.insights.rules.model.Measurement;
3539
import org.opensearch.plugin.insights.rules.model.MetricType;
3640
import org.opensearch.plugin.insights.rules.model.SearchQueryRecord;
3741
import org.opensearch.tasks.Task;
@@ -101,6 +105,26 @@ public QueryInsightsListener(
101105
this.queryInsightsService.setWindowSize(type, clusterService.getClusterSettings().get(getTopNWindowSizeSetting(type)));
102106
}
103107

108+
// Settings endpoints set for grouping top n queries
109+
clusterService.getClusterSettings()
110+
.addSettingsUpdateConsumer(
111+
TOP_N_QUERIES_GROUP_BY,
112+
v -> this.queryInsightsService.setGrouping(v),
113+
v -> this.queryInsightsService.validateGrouping(v)
114+
);
115+
this.queryInsightsService.validateGrouping(clusterService.getClusterSettings().get(TOP_N_QUERIES_GROUP_BY));
116+
this.queryInsightsService.setGrouping(clusterService.getClusterSettings().get(TOP_N_QUERIES_GROUP_BY));
117+
118+
clusterService.getClusterSettings()
119+
.addSettingsUpdateConsumer(
120+
TOP_N_QUERIES_MAX_GROUPS_EXCLUDING_N,
121+
v -> this.queryInsightsService.setMaximumGroups(v),
122+
v -> this.queryInsightsService.validateMaximumGroups(v)
123+
);
124+
this.queryInsightsService.validateMaximumGroups(clusterService.getClusterSettings().get(TOP_N_QUERIES_MAX_GROUPS_EXCLUDING_N));
125+
this.queryInsightsService.setMaximumGroups(clusterService.getClusterSettings().get(TOP_N_QUERIES_MAX_GROUPS_EXCLUDING_N));
126+
127+
// Settings endpoints set for search query metrics
104128
clusterService.getClusterSettings()
105129
.addSettingsUpdateConsumer(SEARCH_QUERY_METRICS_ENABLED_SETTING, v -> setSearchQueryMetricsEnabled(v));
106130
setSearchQueryMetricsEnabled(clusterService.getClusterSettings().get(SEARCH_QUERY_METRICS_ENABLED_SETTING));
@@ -191,32 +215,40 @@ private void constructSearchQueryRecord(final SearchPhaseContext context, final
191215

192216
final SearchRequest request = context.getRequest();
193217
try {
194-
Map<MetricType, Number> measurements = new HashMap<>();
218+
Map<MetricType, Measurement> measurements = new HashMap<>();
195219
if (shouldCollect(MetricType.LATENCY)) {
196220
measurements.put(
197221
MetricType.LATENCY,
198-
TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - searchRequestContext.getAbsoluteStartNanos())
222+
new Measurement(TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - searchRequestContext.getAbsoluteStartNanos()))
199223
);
200224
}
201225
if (shouldCollect(MetricType.CPU)) {
202226
measurements.put(
203227
MetricType.CPU,
204-
tasksResourceUsages.stream().map(a -> a.getTaskResourceUsage().getCpuTimeInNanos()).mapToLong(Long::longValue).sum()
228+
new Measurement(
229+
tasksResourceUsages.stream().map(a -> a.getTaskResourceUsage().getCpuTimeInNanos()).mapToLong(Long::longValue).sum()
230+
)
205231
);
206232
}
207233
if (shouldCollect(MetricType.MEMORY)) {
208234
measurements.put(
209235
MetricType.MEMORY,
210-
tasksResourceUsages.stream().map(a -> a.getTaskResourceUsage().getMemoryInBytes()).mapToLong(Long::longValue).sum()
236+
new Measurement(
237+
tasksResourceUsages.stream().map(a -> a.getTaskResourceUsage().getMemoryInBytes()).mapToLong(Long::longValue).sum()
238+
)
211239
);
212240
}
241+
242+
String hashcode = QueryShapeGenerator.getShapeHashCodeAsString(request.source(), false);
243+
213244
Map<Attribute, Object> attributes = new HashMap<>();
214245
attributes.put(Attribute.SEARCH_TYPE, request.searchType().toString().toLowerCase(Locale.ROOT));
215246
attributes.put(Attribute.SOURCE, request.source());
216247
attributes.put(Attribute.TOTAL_SHARDS, context.getNumShards());
217248
attributes.put(Attribute.INDICES, request.indices());
218249
attributes.put(Attribute.PHASE_LATENCY_MAP, searchRequestContext.phaseTookMap());
219250
attributes.put(Attribute.TASK_RESOURCE_USAGES, tasksResourceUsages);
251+
attributes.put(Attribute.QUERY_HASHCODE, hashcode);
220252

221253
Map<String, Object> labels = new HashMap<>();
222254
// Retrieve user provided label if exists

src/main/java/org/opensearch/plugin/insights/core/service/QueryInsightsService.java

+80-2
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
package org.opensearch.plugin.insights.core.service;
1010

11+
import static org.opensearch.plugin.insights.settings.QueryInsightsSettings.DEFAULT_GROUPING_TYPE;
1112
import static org.opensearch.plugin.insights.settings.QueryInsightsSettings.getExporterSettings;
1213

1314
import java.io.IOException;
@@ -27,6 +28,7 @@
2728
import org.opensearch.common.unit.TimeValue;
2829
import org.opensearch.plugin.insights.core.exporter.QueryInsightsExporterFactory;
2930
import org.opensearch.plugin.insights.core.service.categorizer.SearchQueryCategorizer;
31+
import org.opensearch.plugin.insights.rules.model.GroupingType;
3032
import org.opensearch.plugin.insights.rules.model.MetricType;
3133
import org.opensearch.plugin.insights.rules.model.SearchQueryRecord;
3234
import org.opensearch.plugin.insights.settings.QueryInsightsSettings;
@@ -73,6 +75,11 @@ public class QueryInsightsService extends AbstractLifecycleComponent {
7375
*/
7476
final QueryInsightsExporterFactory queryInsightsExporterFactory;
7577

78+
/**
79+
* Flags for enabling insight data grouping for different metric types
80+
*/
81+
private GroupingType groupingType;
82+
7683
private volatile boolean searchQueryMetricsEnabled;
7784

7885
private SearchQueryCategorizer searchQueryCategorizer;
@@ -112,16 +119,17 @@ public QueryInsightsService(
112119

113120
this.searchQueryCategorizer = SearchQueryCategorizer.getInstance(metricsRegistry);
114121
this.enableSearchQueryMetricsFeature(false);
122+
this.groupingType = DEFAULT_GROUPING_TYPE;
115123
}
116124

117125
/**
118126
* Ingest the query data into in-memory stores
119127
*
120128
* @param record the record to ingest
121-
* @return SearchQueryRecord
129+
* @return true/false
122130
*/
123131
public boolean addRecord(final SearchQueryRecord record) {
124-
boolean shouldAdd = searchQueryMetricsEnabled;
132+
boolean shouldAdd = isSearchQueryMetricsFeatureEnabled() || isGroupingEnabled();
125133
if (!shouldAdd) {
126134
for (Map.Entry<MetricType, TopQueriesService> entry : topQueriesServices.entrySet()) {
127135
if (!enableCollect.get(entry.getKey())) {
@@ -185,6 +193,67 @@ public void enableCollection(final MetricType metricType, final boolean enable)
185193
this.topQueriesServices.get(metricType).setEnabled(enable);
186194
}
187195

196+
/**
197+
* Validate grouping given grouping type setting
198+
* @param groupingTypeSetting grouping setting
199+
*/
200+
public void validateGrouping(final String groupingTypeSetting) {
201+
GroupingType.getGroupingTypeFromSettingAndValidate(groupingTypeSetting);
202+
}
203+
204+
/**
205+
* Set grouping
206+
* @param groupingTypeSetting grouping
207+
*/
208+
public void setGrouping(final String groupingTypeSetting) {
209+
GroupingType newGroupingType = GroupingType.getGroupingTypeFromSettingAndValidate(groupingTypeSetting);
210+
GroupingType oldGroupingType = groupingType;
211+
212+
if (oldGroupingType != newGroupingType) {
213+
groupingType = newGroupingType;
214+
215+
for (MetricType metricType : MetricType.allMetricTypes()) {
216+
this.topQueriesServices.get(metricType).setGrouping(newGroupingType);
217+
}
218+
}
219+
}
220+
221+
/**
222+
* Set max number of groups
223+
* @param maxGroups maximum number of groups that should be tracked when calculating Top N groups
224+
*/
225+
public void setMaximumGroups(final int maxGroups) {
226+
for (MetricType metricType : MetricType.allMetricTypes()) {
227+
this.topQueriesServices.get(metricType).setMaxGroups(maxGroups);
228+
}
229+
}
230+
231+
/**
232+
* Validate max number of groups. Should be between 1 and MAX_GROUPS_LIMIT
233+
* @param maxGroups maximum number of groups that should be tracked when calculating Top N groups
234+
*/
235+
public void validateMaximumGroups(final int maxGroups) {
236+
if (maxGroups < 0 || maxGroups > QueryInsightsSettings.MAX_GROUPS_EXCLUDING_TOPN_LIMIT) {
237+
throw new IllegalArgumentException(
238+
"Max groups setting"
239+
+ " should be between 0 and "
240+
+ QueryInsightsSettings.MAX_GROUPS_EXCLUDING_TOPN_LIMIT
241+
+ ", was ("
242+
+ maxGroups
243+
+ ")"
244+
);
245+
}
246+
}
247+
248+
/**
249+
* Get the grouping type based on the metricType
250+
* @return GroupingType
251+
*/
252+
253+
public GroupingType getGrouping() {
254+
return groupingType;
255+
}
256+
188257
/**
189258
* Get if the Query Insights data collection is enabled for a MetricType
190259
*
@@ -226,9 +295,18 @@ public boolean isSearchQueryMetricsFeatureEnabled() {
226295
return this.searchQueryMetricsEnabled;
227296
}
228297

298+
/**
299+
* Is grouping feature enabled and TopN feature enabled
300+
* @return boolean
301+
*/
302+
public boolean isGroupingEnabled() {
303+
return this.groupingType != GroupingType.NONE && isTopNFeatureEnabled();
304+
}
305+
229306
/**
230307
* Enable/Disable search query metrics feature.
231308
* @param enable enable/disable search query metrics feature
309+
* Stops query insights service if no features enabled
232310
*/
233311
public void enableSearchQueryMetricsFeature(boolean enable) {
234312
searchQueryMetricsEnabled = enable;

src/main/java/org/opensearch/plugin/insights/core/service/TopQueriesService.java

+53-7
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
import java.util.Collection;
2525
import java.util.List;
2626
import java.util.Locale;
27-
import java.util.PriorityQueue;
27+
import java.util.concurrent.PriorityBlockingQueue;
2828
import java.util.concurrent.atomic.AtomicReference;
2929
import java.util.stream.Collectors;
3030
import java.util.stream.Stream;
@@ -35,6 +35,10 @@
3535
import org.opensearch.plugin.insights.core.exporter.QueryInsightsExporter;
3636
import org.opensearch.plugin.insights.core.exporter.QueryInsightsExporterFactory;
3737
import org.opensearch.plugin.insights.core.exporter.SinkType;
38+
import org.opensearch.plugin.insights.core.service.grouper.MinMaxHeapQueryGrouper;
39+
import org.opensearch.plugin.insights.core.service.grouper.QueryGrouper;
40+
import org.opensearch.plugin.insights.rules.model.AggregationType;
41+
import org.opensearch.plugin.insights.rules.model.GroupingType;
3842
import org.opensearch.plugin.insights.rules.model.MetricType;
3943
import org.opensearch.plugin.insights.rules.model.SearchQueryRecord;
4044
import org.opensearch.plugin.insights.settings.QueryInsightsSettings;
@@ -66,7 +70,7 @@ public class TopQueriesService {
6670
/**
6771
* The internal thread-safe store that holds the top n queries insight data
6872
*/
69-
private final PriorityQueue<SearchQueryRecord> topQueriesStore;
73+
private final PriorityBlockingQueue<SearchQueryRecord> topQueriesStore;
7074

7175
/**
7276
* The AtomicReference of a snapshot of the current window top queries for getters to consume
@@ -93,6 +97,8 @@ public class TopQueriesService {
9397
*/
9498
private QueryInsightsExporter exporter;
9599

100+
private QueryGrouper queryGrouper;
101+
96102
TopQueriesService(
97103
final MetricType metricType,
98104
final ThreadPool threadPool,
@@ -106,9 +112,16 @@ public class TopQueriesService {
106112
this.windowSize = QueryInsightsSettings.DEFAULT_WINDOW_SIZE;
107113
this.windowStart = -1L;
108114
this.exporter = null;
109-
topQueriesStore = new PriorityQueue<>(topNSize, (a, b) -> SearchQueryRecord.compare(a, b, metricType));
115+
topQueriesStore = new PriorityBlockingQueue<>(topNSize, (a, b) -> SearchQueryRecord.compare(a, b, metricType));
110116
topQueriesCurrentSnapshot = new AtomicReference<>(new ArrayList<>());
111117
topQueriesHistorySnapshot = new AtomicReference<>(new ArrayList<>());
118+
queryGrouper = new MinMaxHeapQueryGrouper(
119+
metricType,
120+
QueryInsightsSettings.DEFAULT_GROUPING_TYPE,
121+
AggregationType.AVERAGE,
122+
topQueriesStore,
123+
topNSize
124+
);
112125
}
113126

114127
/**
@@ -118,6 +131,7 @@ public class TopQueriesService {
118131
*/
119132
public void setTopNSize(final int topNSize) {
120133
this.topNSize = topNSize;
134+
this.queryGrouper.updateTopNSize(topNSize);
121135
}
122136

123137
/**
@@ -169,6 +183,20 @@ public void setWindowSize(final TimeValue windowSize) {
169183
this.windowStart = -1L;
170184
}
171185

186+
public void setGrouping(final GroupingType groupingType) {
187+
boolean changed = queryGrouper.setGroupingType(groupingType);
188+
if (changed) {
189+
drain();
190+
}
191+
}
192+
193+
public void setMaxGroups(final int maxGroups) {
194+
boolean changed = queryGrouper.setMaxGroups(maxGroups);
195+
if (changed) {
196+
drain();
197+
}
198+
}
199+
172200
/**
173201
* Validate if the window size is valid, based on internal constrains.
174202
*
@@ -306,10 +334,16 @@ void consumeRecords(final List<SearchQueryRecord> records) {
306334
}
307335

308336
private void addToTopNStore(final List<SearchQueryRecord> records) {
309-
topQueriesStore.addAll(records);
310-
// remove top elements for fix sizing priority queue
311-
while (topQueriesStore.size() > topNSize) {
312-
topQueriesStore.poll();
337+
if (queryGrouper.getGroupingType() != GroupingType.NONE) {
338+
for (SearchQueryRecord record : records) {
339+
queryGrouper.add(record);
340+
}
341+
} else {
342+
topQueriesStore.addAll(records);
343+
// remove top elements for fix sizing priority queue
344+
while (topQueriesStore.size() > topNSize) {
345+
topQueriesStore.poll();
346+
}
313347
}
314348
}
315349

@@ -329,6 +363,9 @@ private void rotateWindowIfNecessary(final long newWindowStart) {
329363
}
330364
topQueriesHistorySnapshot.set(history);
331365
topQueriesStore.clear();
366+
if (queryGrouper.getGroupingType() != GroupingType.NONE) {
367+
queryGrouper.drain();
368+
}
332369
topQueriesCurrentSnapshot.set(new ArrayList<>());
333370
windowStart = newWindowStart;
334371
// export to the configured sink
@@ -368,4 +405,13 @@ public List<SearchQueryRecord> getTopQueriesCurrentSnapshot() {
368405
public void close() throws IOException {
369406
queryInsightsExporterFactory.closeExporter(this.exporter);
370407
}
408+
409+
/**
410+
* Drain internal stores.
411+
*/
412+
private void drain() {
413+
topQueriesStore.clear();
414+
topQueriesHistorySnapshot.set(new ArrayList<>());
415+
topQueriesCurrentSnapshot.set(new ArrayList<>());
416+
}
371417
}

src/main/java/org/opensearch/plugin/insights/core/service/categorizer/QueryShapeGenerator.java

+6
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,12 @@ public static MurmurHash3.Hash128 getShapeHashCode(SearchSourceBuilder source, B
9090
return MurmurHash3.hash128(shapeBytes.bytes, 0, shapeBytes.length, 0, new MurmurHash3.Hash128());
9191
}
9292

93+
public static String getShapeHashCodeAsString(SearchSourceBuilder source, Boolean showFields) {
94+
MurmurHash3.Hash128 hashcode = getShapeHashCode(source, showFields);
95+
String hashAsString = Long.toHexString(hashcode.h1) + Long.toHexString(hashcode.h2);
96+
return hashAsString;
97+
}
98+
9399
/**
94100
* Method to build search query shape given a source
95101
* @param source search request source

0 commit comments

Comments
 (0)