Skip to content

Commit 8e72a57

Browse files
Query shape for agg & sort (#44) (#77)
(cherry picked from commit b55d760) Signed-off-by: David Zane <davizane@amazon.com> Signed-off-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
1 parent aeeaa24 commit 8e72a57

File tree

4 files changed

+611
-21
lines changed

4 files changed

+611
-21
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,322 @@
1+
/*
2+
* SPDX-License-Identifier: Apache-2.0
3+
*
4+
* The OpenSearch Contributors require contributions made to
5+
* this file be licensed under the Apache-2.0 license or a
6+
* compatible open source license.
7+
*/
8+
9+
package org.opensearch.plugin.insights.core.service.categorizer;
10+
11+
import java.util.ArrayList;
12+
import java.util.Collection;
13+
import java.util.Collections;
14+
import java.util.List;
15+
import java.util.Map;
16+
import java.util.function.Function;
17+
import org.opensearch.core.common.io.stream.NamedWriteable;
18+
import org.opensearch.index.query.AbstractGeometryQueryBuilder;
19+
import org.opensearch.index.query.CommonTermsQueryBuilder;
20+
import org.opensearch.index.query.ExistsQueryBuilder;
21+
import org.opensearch.index.query.FieldMaskingSpanQueryBuilder;
22+
import org.opensearch.index.query.FuzzyQueryBuilder;
23+
import org.opensearch.index.query.GeoBoundingBoxQueryBuilder;
24+
import org.opensearch.index.query.GeoDistanceQueryBuilder;
25+
import org.opensearch.index.query.GeoPolygonQueryBuilder;
26+
import org.opensearch.index.query.MatchBoolPrefixQueryBuilder;
27+
import org.opensearch.index.query.MatchPhrasePrefixQueryBuilder;
28+
import org.opensearch.index.query.MatchPhraseQueryBuilder;
29+
import org.opensearch.index.query.MatchQueryBuilder;
30+
import org.opensearch.index.query.MultiTermQueryBuilder;
31+
import org.opensearch.index.query.PrefixQueryBuilder;
32+
import org.opensearch.index.query.QueryBuilder;
33+
import org.opensearch.index.query.RangeQueryBuilder;
34+
import org.opensearch.index.query.RegexpQueryBuilder;
35+
import org.opensearch.index.query.SpanNearQueryBuilder;
36+
import org.opensearch.index.query.SpanTermQueryBuilder;
37+
import org.opensearch.index.query.TermQueryBuilder;
38+
import org.opensearch.index.query.TermsQueryBuilder;
39+
import org.opensearch.index.query.WildcardQueryBuilder;
40+
import org.opensearch.search.aggregations.AggregationBuilder;
41+
import org.opensearch.search.aggregations.AggregatorFactories;
42+
import org.opensearch.search.aggregations.PipelineAggregationBuilder;
43+
import org.opensearch.search.aggregations.bucket.histogram.AutoDateHistogramAggregationBuilder;
44+
import org.opensearch.search.aggregations.bucket.histogram.DateHistogramAggregationBuilder;
45+
import org.opensearch.search.aggregations.bucket.histogram.HistogramAggregationBuilder;
46+
import org.opensearch.search.aggregations.bucket.histogram.VariableWidthHistogramAggregationBuilder;
47+
import org.opensearch.search.aggregations.bucket.missing.MissingAggregationBuilder;
48+
import org.opensearch.search.aggregations.bucket.range.AbstractRangeBuilder;
49+
import org.opensearch.search.aggregations.bucket.range.GeoDistanceAggregationBuilder;
50+
import org.opensearch.search.aggregations.bucket.range.IpRangeAggregationBuilder;
51+
import org.opensearch.search.aggregations.bucket.sampler.DiversifiedAggregationBuilder;
52+
import org.opensearch.search.aggregations.bucket.terms.RareTermsAggregationBuilder;
53+
import org.opensearch.search.aggregations.bucket.terms.SignificantTermsAggregationBuilder;
54+
import org.opensearch.search.aggregations.bucket.terms.TermsAggregationBuilder;
55+
import org.opensearch.search.aggregations.metrics.AvgAggregationBuilder;
56+
import org.opensearch.search.aggregations.metrics.CardinalityAggregationBuilder;
57+
import org.opensearch.search.aggregations.metrics.ExtendedStatsAggregationBuilder;
58+
import org.opensearch.search.aggregations.metrics.GeoCentroidAggregationBuilder;
59+
import org.opensearch.search.aggregations.metrics.MaxAggregationBuilder;
60+
import org.opensearch.search.aggregations.metrics.MinAggregationBuilder;
61+
import org.opensearch.search.aggregations.metrics.StatsAggregationBuilder;
62+
import org.opensearch.search.aggregations.metrics.SumAggregationBuilder;
63+
import org.opensearch.search.aggregations.metrics.ValueCountAggregationBuilder;
64+
import org.opensearch.search.aggregations.support.ValuesSourceAggregationBuilder;
65+
import org.opensearch.search.builder.SearchSourceBuilder;
66+
import org.opensearch.search.sort.FieldSortBuilder;
67+
import org.opensearch.search.sort.SortBuilder;
68+
69+
/**
70+
* Class to generate query shape
71+
*/
72+
public class QueryShapeGenerator {
73+
static final String EMPTY_STRING = "";
74+
static final String ONE_SPACE_INDENT = " ";
75+
static final Map<Class<?>, List<Function<Object, String>>> QUERY_FIELD_DATA_MAP = FieldDataMapHelper.getQueryFieldDataMap();
76+
static final Map<Class<?>, List<Function<Object, String>>> AGG_FIELD_DATA_MAP = FieldDataMapHelper.getAggFieldDataMap();
77+
static final Map<Class<?>, List<Function<Object, String>>> SORT_FIELD_DATA_MAP = FieldDataMapHelper.getSortFieldDataMap();
78+
79+
/**
80+
* Method to build search query shape given a source
81+
* @param source search request source
82+
* @param showFields whether to append field data
83+
* @return Search query shape as String
84+
*/
85+
public static String buildShape(SearchSourceBuilder source, Boolean showFields) {
86+
StringBuilder shape = new StringBuilder();
87+
shape.append(buildQueryShape(source.query(), showFields));
88+
shape.append(buildAggregationShape(source.aggregations(), showFields));
89+
shape.append(buildSortShape(source.sorts(), showFields));
90+
return shape.toString();
91+
}
92+
93+
/**
94+
* Method to build query-section shape
95+
* @param queryBuilder search request query builder
96+
* @param showFields whether to append field data
97+
* @return Query-section shape as String
98+
*/
99+
static String buildQueryShape(QueryBuilder queryBuilder, Boolean showFields) {
100+
if (queryBuilder == null) {
101+
return EMPTY_STRING;
102+
}
103+
QueryShapeVisitor shapeVisitor = new QueryShapeVisitor();
104+
queryBuilder.visit(shapeVisitor);
105+
return shapeVisitor.prettyPrintTree(EMPTY_STRING, showFields);
106+
}
107+
108+
/**
109+
* Method to build aggregation shape
110+
* @param aggregationsBuilder search request aggregation builder
111+
* @param showFields whether to append field data
112+
* @return Aggregation shape as String
113+
*/
114+
static String buildAggregationShape(AggregatorFactories.Builder aggregationsBuilder, Boolean showFields) {
115+
if (aggregationsBuilder == null) {
116+
return EMPTY_STRING;
117+
}
118+
StringBuilder aggregationShape = recursiveAggregationShapeBuilder(
119+
aggregationsBuilder.getAggregatorFactories(),
120+
aggregationsBuilder.getPipelineAggregatorFactories(),
121+
new StringBuilder(),
122+
new StringBuilder(),
123+
showFields
124+
);
125+
return aggregationShape.toString();
126+
}
127+
128+
static StringBuilder recursiveAggregationShapeBuilder(
129+
Collection<AggregationBuilder> aggregationBuilders,
130+
Collection<PipelineAggregationBuilder> pipelineAggregations,
131+
StringBuilder outputBuilder,
132+
StringBuilder baseIndent,
133+
Boolean showFields
134+
) {
135+
//// Normal Aggregations ////
136+
if (aggregationBuilders.isEmpty() == false) {
137+
outputBuilder.append(baseIndent).append("aggregation:").append("\n");
138+
}
139+
List<String> aggShapeStrings = new ArrayList<>();
140+
for (AggregationBuilder aggBuilder : aggregationBuilders) {
141+
StringBuilder stringBuilder = new StringBuilder();
142+
stringBuilder.append(baseIndent).append(ONE_SPACE_INDENT.repeat(2)).append(aggBuilder.getType());
143+
if (showFields) {
144+
stringBuilder.append(buildFieldDataString(AGG_FIELD_DATA_MAP.get(aggBuilder.getClass()), aggBuilder));
145+
}
146+
stringBuilder.append("\n");
147+
148+
if (aggBuilder.getSubAggregations().isEmpty() == false) {
149+
// Recursive call on sub-aggregations
150+
recursiveAggregationShapeBuilder(
151+
aggBuilder.getSubAggregations(),
152+
aggBuilder.getPipelineAggregations(),
153+
stringBuilder,
154+
baseIndent.append(ONE_SPACE_INDENT.repeat(4)),
155+
showFields
156+
);
157+
baseIndent.delete(0, 4);
158+
}
159+
aggShapeStrings.add(stringBuilder.toString());
160+
}
161+
162+
// Sort alphanumerically and append aggregations list
163+
Collections.sort(aggShapeStrings);
164+
for (String shapeString : aggShapeStrings) {
165+
outputBuilder.append(shapeString);
166+
}
167+
168+
//// Pipeline Aggregation (cannot have sub-aggregations) ////
169+
if (pipelineAggregations.isEmpty() == false) {
170+
outputBuilder.append(baseIndent).append(ONE_SPACE_INDENT.repeat(2)).append("pipeline aggregation:").append("\n");
171+
172+
List<String> pipelineAggShapeStrings = new ArrayList<>();
173+
for (PipelineAggregationBuilder pipelineAgg : pipelineAggregations) {
174+
pipelineAggShapeStrings.add(
175+
new StringBuilder().append(baseIndent)
176+
.append(ONE_SPACE_INDENT.repeat(4))
177+
.append(pipelineAgg.getType())
178+
.append("\n")
179+
.toString()
180+
);
181+
}
182+
183+
// Sort alphanumerically and append pipeline aggregations list
184+
Collections.sort(pipelineAggShapeStrings);
185+
for (String shapeString : pipelineAggShapeStrings) {
186+
outputBuilder.append(shapeString);
187+
}
188+
}
189+
return outputBuilder;
190+
}
191+
192+
/**
193+
* Method to build sort shape
194+
* @param sortBuilderList search request sort builders list
195+
* @param showFields whether to append field data
196+
* @return Sort shape as String
197+
*/
198+
static String buildSortShape(List<SortBuilder<?>> sortBuilderList, Boolean showFields) {
199+
if (sortBuilderList == null || sortBuilderList.isEmpty()) {
200+
return EMPTY_STRING;
201+
}
202+
StringBuilder sortShape = new StringBuilder();
203+
sortShape.append("sort:\n");
204+
205+
List<String> shapeStrings = new ArrayList<>();
206+
for (SortBuilder<?> sortBuilder : sortBuilderList) {
207+
StringBuilder stringBuilder = new StringBuilder();
208+
stringBuilder.append(ONE_SPACE_INDENT.repeat(2)).append(sortBuilder.order());
209+
if (showFields) {
210+
stringBuilder.append(buildFieldDataString(SORT_FIELD_DATA_MAP.get(sortBuilder.getClass()), sortBuilder));
211+
}
212+
shapeStrings.add(stringBuilder.toString());
213+
}
214+
215+
Collections.sort(shapeStrings);
216+
for (String line : shapeStrings) {
217+
sortShape.append(line).append("\n");
218+
}
219+
return sortShape.toString();
220+
}
221+
222+
/**
223+
* Method to build field data
224+
* @return String: comma separated list with leading space in square brackets
225+
* Ex: " [my_field, width:5]"
226+
*/
227+
static String buildFieldDataString(List<Function<Object, String>> methods, NamedWriteable builder) {
228+
List<String> fieldDataList = new ArrayList<>();
229+
if (methods != null) {
230+
for (Function<Object, String> lambda : methods) {
231+
fieldDataList.add(lambda.apply(builder));
232+
}
233+
}
234+
return " [" + String.join(", ", fieldDataList) + "]";
235+
}
236+
237+
/**
238+
* Helper class to create static field data maps
239+
*/
240+
private static class FieldDataMapHelper {
241+
242+
// Helper method to create map entries
243+
private static <T> Map.Entry<Class<?>, List<Function<Object, String>>> createEntry(Class<T> clazz, Function<T, String> extractor) {
244+
return Map.entry(clazz, List.of(obj -> extractor.apply(clazz.cast(obj))));
245+
}
246+
247+
/**
248+
* Returns a map where the keys are query builders, and the values are lists of
249+
* functions that extract field values from instances of these classes.
250+
*
251+
* @return a map with class types as keys and lists of field extraction functions as values.
252+
*/
253+
private static Map<Class<?>, List<Function<Object, String>>> getQueryFieldDataMap() {
254+
return Map.ofEntries(
255+
createEntry(AbstractGeometryQueryBuilder.class, AbstractGeometryQueryBuilder::fieldName),
256+
createEntry(CommonTermsQueryBuilder.class, CommonTermsQueryBuilder::fieldName),
257+
createEntry(ExistsQueryBuilder.class, ExistsQueryBuilder::fieldName),
258+
createEntry(FieldMaskingSpanQueryBuilder.class, FieldMaskingSpanQueryBuilder::fieldName),
259+
createEntry(FuzzyQueryBuilder.class, FuzzyQueryBuilder::fieldName),
260+
createEntry(GeoBoundingBoxQueryBuilder.class, GeoBoundingBoxQueryBuilder::fieldName),
261+
createEntry(GeoDistanceQueryBuilder.class, GeoDistanceQueryBuilder::fieldName),
262+
createEntry(GeoPolygonQueryBuilder.class, GeoPolygonQueryBuilder::fieldName),
263+
createEntry(MatchBoolPrefixQueryBuilder.class, MatchBoolPrefixQueryBuilder::fieldName),
264+
createEntry(MatchQueryBuilder.class, MatchQueryBuilder::fieldName),
265+
createEntry(MatchPhraseQueryBuilder.class, MatchPhraseQueryBuilder::fieldName),
266+
createEntry(MatchPhrasePrefixQueryBuilder.class, MatchPhrasePrefixQueryBuilder::fieldName),
267+
createEntry(MultiTermQueryBuilder.class, MultiTermQueryBuilder::fieldName),
268+
createEntry(PrefixQueryBuilder.class, PrefixQueryBuilder::fieldName),
269+
createEntry(RangeQueryBuilder.class, RangeQueryBuilder::fieldName),
270+
createEntry(RegexpQueryBuilder.class, RegexpQueryBuilder::fieldName),
271+
createEntry(SpanNearQueryBuilder.SpanGapQueryBuilder.class, SpanNearQueryBuilder.SpanGapQueryBuilder::fieldName),
272+
createEntry(SpanTermQueryBuilder.class, SpanTermQueryBuilder::fieldName),
273+
createEntry(TermQueryBuilder.class, TermQueryBuilder::fieldName),
274+
createEntry(TermsQueryBuilder.class, TermsQueryBuilder::fieldName),
275+
createEntry(WildcardQueryBuilder.class, WildcardQueryBuilder::fieldName)
276+
);
277+
}
278+
279+
/**
280+
* Returns a map where the keys are aggregation builders, and the values are lists of
281+
* functions that extract field values from instances of these classes.
282+
*
283+
* @return a map with class types as keys and lists of field extraction functions as values.
284+
*/
285+
private static Map<Class<?>, List<Function<Object, String>>> getAggFieldDataMap() {
286+
return Map.ofEntries(
287+
createEntry(IpRangeAggregationBuilder.class, IpRangeAggregationBuilder::field),
288+
createEntry(AutoDateHistogramAggregationBuilder.class, AutoDateHistogramAggregationBuilder::field),
289+
createEntry(DateHistogramAggregationBuilder.class, DateHistogramAggregationBuilder::field),
290+
createEntry(HistogramAggregationBuilder.class, HistogramAggregationBuilder::field),
291+
createEntry(VariableWidthHistogramAggregationBuilder.class, VariableWidthHistogramAggregationBuilder::field),
292+
createEntry(MissingAggregationBuilder.class, MissingAggregationBuilder::field),
293+
createEntry(AbstractRangeBuilder.class, AbstractRangeBuilder::field),
294+
createEntry(GeoDistanceAggregationBuilder.class, GeoDistanceAggregationBuilder::field),
295+
createEntry(DiversifiedAggregationBuilder.class, DiversifiedAggregationBuilder::field),
296+
createEntry(RareTermsAggregationBuilder.class, RareTermsAggregationBuilder::field),
297+
createEntry(SignificantTermsAggregationBuilder.class, SignificantTermsAggregationBuilder::field),
298+
createEntry(TermsAggregationBuilder.class, TermsAggregationBuilder::field),
299+
createEntry(AvgAggregationBuilder.class, AvgAggregationBuilder::field),
300+
createEntry(CardinalityAggregationBuilder.class, CardinalityAggregationBuilder::field),
301+
createEntry(ExtendedStatsAggregationBuilder.class, ExtendedStatsAggregationBuilder::field),
302+
createEntry(GeoCentroidAggregationBuilder.class, GeoCentroidAggregationBuilder::field),
303+
createEntry(MaxAggregationBuilder.class, MaxAggregationBuilder::field),
304+
createEntry(MinAggregationBuilder.class, MinAggregationBuilder::field),
305+
createEntry(StatsAggregationBuilder.class, StatsAggregationBuilder::field),
306+
createEntry(SumAggregationBuilder.class, SumAggregationBuilder::field),
307+
createEntry(ValueCountAggregationBuilder.class, ValueCountAggregationBuilder::field),
308+
createEntry(ValuesSourceAggregationBuilder.class, ValuesSourceAggregationBuilder::field)
309+
);
310+
}
311+
312+
/**
313+
* Returns a map where the keys are sort builders, and the values are lists of
314+
* functions that extract field values from instances of these classes.
315+
*
316+
* @return a map with class types as keys and lists of field extraction functions as values.
317+
*/
318+
private static Map<Class<?>, List<Function<Object, String>>> getSortFieldDataMap() {
319+
return Map.ofEntries(createEntry(FieldSortBuilder.class, FieldSortBuilder::getFieldName));
320+
}
321+
}
322+
}

src/main/java/org/opensearch/plugin/insights/core/service/categorizer/QueryShapeVisitor.java

+28-6
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,15 @@
88

99
package org.opensearch.plugin.insights.core.service.categorizer;
1010

11+
import static org.opensearch.plugin.insights.core.service.categorizer.QueryShapeGenerator.ONE_SPACE_INDENT;
12+
import static org.opensearch.plugin.insights.core.service.categorizer.QueryShapeGenerator.QUERY_FIELD_DATA_MAP;
13+
1114
import java.util.ArrayList;
1215
import java.util.EnumMap;
1316
import java.util.List;
1417
import java.util.Locale;
1518
import java.util.Map;
19+
import java.util.function.Function;
1620
import org.apache.lucene.search.BooleanClause;
1721
import org.opensearch.common.SetOnce;
1822
import org.opensearch.index.query.QueryBuilder;
@@ -23,11 +27,21 @@
2327
*/
2428
public final class QueryShapeVisitor implements QueryBuilderVisitor {
2529
private final SetOnce<String> queryType = new SetOnce<>();
30+
private final SetOnce<String> fieldData = new SetOnce<>();
2631
private final Map<BooleanClause.Occur, List<QueryShapeVisitor>> childVisitors = new EnumMap<>(BooleanClause.Occur.class);
2732

2833
@Override
29-
public void accept(QueryBuilder qb) {
30-
queryType.set(qb.getName());
34+
public void accept(QueryBuilder queryBuilder) {
35+
queryType.set(queryBuilder.getName());
36+
37+
List<String> fieldDataList = new ArrayList<>();
38+
List<Function<Object, String>> methods = QUERY_FIELD_DATA_MAP.get(queryBuilder.getClass());
39+
if (methods != null) {
40+
for (Function<Object, String> lambda : methods) {
41+
fieldDataList.add(lambda.apply(queryBuilder));
42+
}
43+
}
44+
fieldData.set(String.join(", ", fieldDataList));
3145
}
3246

3347
@Override
@@ -81,14 +95,22 @@ public String toJson() {
8195
/**
8296
* Pretty print the query builder tree
8397
* @param indent indent size
98+
* @param showFields whether to print field data
8499
* @return Query builder tree as a pretty string
85100
*/
86-
public String prettyPrintTree(String indent) {
87-
StringBuilder outputBuilder = new StringBuilder(indent).append(queryType.get()).append("\n");
101+
public String prettyPrintTree(String indent, Boolean showFields) {
102+
StringBuilder outputBuilder = new StringBuilder(indent).append(queryType.get());
103+
if (showFields) {
104+
outputBuilder.append(" [").append(fieldData.get()).append("]");
105+
}
106+
outputBuilder.append("\n");
88107
for (Map.Entry<BooleanClause.Occur, List<QueryShapeVisitor>> entry : childVisitors.entrySet()) {
89-
outputBuilder.append(indent).append(" ").append(entry.getKey().name().toLowerCase(Locale.ROOT)).append(":\n");
108+
outputBuilder.append(indent)
109+
.append(ONE_SPACE_INDENT.repeat(2))
110+
.append(entry.getKey().name().toLowerCase(Locale.ROOT))
111+
.append(":\n");
90112
for (QueryShapeVisitor child : entry.getValue()) {
91-
outputBuilder.append(child.prettyPrintTree(indent + " "));
113+
outputBuilder.append(child.prettyPrintTree(indent + ONE_SPACE_INDENT.repeat(4), showFields));
92114
}
93115
}
94116
return outputBuilder.toString();

0 commit comments

Comments
 (0)