forked from opensearch-project/neural-search
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathBaseAggregationsWithHybridQueryIT.java
239 lines (226 loc) · 11.7 KB
/
BaseAggregationsWithHybridQueryIT.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/
package org.opensearch.neuralsearch.query.aggregation;
import lombok.SneakyThrows;
import org.junit.BeforeClass;
import org.opensearch.neuralsearch.BaseNeuralSearchIT;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.IntStream;
import static org.opensearch.neuralsearch.util.TestUtils.RELATION_EQUAL_TO;
import static org.opensearch.neuralsearch.util.AggregationsTestUtils.getNestedHits;
import static org.opensearch.neuralsearch.util.AggregationsTestUtils.getTotalHits;
public class BaseAggregationsWithHybridQueryIT extends BaseNeuralSearchIT {
protected static final String TEST_DOC_TEXT1 = "Hello world";
protected static final String TEST_DOC_TEXT2 = "Hi to this place";
protected static final String TEST_DOC_TEXT3 = "We would like to welcome everyone";
protected static final String TEST_DOC_TEXT4 = "Hello, I'm glad to you see you pal";
protected static final String TEST_DOC_TEXT5 = "People keep telling me orange but I still prefer pink";
protected static final String TEST_DOC_TEXT6 = "She traveled because it cost the same as therapy and was a lot more enjoyable";
protected static final String TEST_TEXT_FIELD_NAME_1 = "test-text-field-1";
protected static final String TEST_QUERY_TEXT3 = "hello";
protected static final String TEST_QUERY_TEXT4 = "cost";
protected static final String TEST_QUERY_TEXT5 = "welcome";
protected static final String NESTED_TYPE_FIELD_USER = "user";
protected static final String NESTED_FIELD_FIRSTNAME = "firstname";
protected static final String NESTED_FIELD_LASTNAME = "lastname";
protected static final String NESTED_FIELD_FIRSTNAME_JOHN = "john";
protected static final String NESTED_FIELD_LASTNAME_BLACK = "black";
protected static final String NESTED_FIELD_FIRSTNAME_FRODO = "frodo";
protected static final String NESTED_FIELD_LASTNAME_BAGGINS = "baggins";
protected static final String NESTED_FIELD_FIRSTNAME_MOHAMMED = "mohammed";
protected static final String NESTED_FIELD_LASTNAME_EZAB = "ezab";
protected static final String NESTED_FIELD_FIRSTNAME_SUN = "sun";
protected static final String NESTED_FIELD_LASTNAME_WUKONG = "wukong";
protected static final String NESTED_FIELD_FIRSTNAME_VASILISA = "vasilisa";
protected static final String NESTED_FIELD_LASTNAME_WISE = "the wise";
protected static final String INTEGER_FIELD_DOCINDEX = "doc_index";
protected static final int INTEGER_FIELD_DOCINDEX_1234 = 1234;
protected static final int INTEGER_FIELD_DOCINDEX_2345 = 2345;
protected static final int INTEGER_FIELD_DOCINDEX_3456 = 3456;
protected static final int INTEGER_FIELD_DOCINDEX_4567 = 4567;
protected static final String KEYWORD_FIELD_DOCKEYWORD = "doc_keyword";
protected static final String KEYWORD_FIELD_DOCKEYWORD_WORKABLE = "workable";
protected static final String KEYWORD_FIELD_DOCKEYWORD_ANGRY = "angry";
protected static final String KEYWORD_FIELD_DOCKEYWORD_LIKABLE = "likeable";
protected static final String KEYWORD_FIELD_DOCKEYWORD_ENTIRE = "entire";
protected static final String DATE_FIELD = "doc_date";
protected static final String DATE_FIELD_01031995 = "01/03/1995";
protected static final String DATE_FIELD_05022015 = "05/02/2015";
protected static final String DATE_FIELD_07232007 = "07/23/2007";
protected static final String DATE_FIELD_08212012 = "08/21/2012";
protected static final String INTEGER_FIELD_PRICE = "doc_price";
protected static final int INTEGER_FIELD_PRICE_130 = 130;
protected static final int INTEGER_FIELD_PRICE_100 = 100;
protected static final int INTEGER_FIELD_PRICE_200 = 200;
protected static final int INTEGER_FIELD_PRICE_25 = 25;
protected static final int INTEGER_FIELD_PRICE_30 = 30;
protected static final int INTEGER_FIELD_PRICE_350 = 350;
protected static final String BUCKET_AGG_DOC_COUNT_FIELD = "doc_count";
protected static final String BUCKETS_AGGREGATION_NAME_1 = "date_buckets_1";
protected static final String BUCKETS_AGGREGATION_NAME_2 = "date_buckets_2";
protected static final String BUCKETS_AGGREGATION_NAME_3 = "date_buckets_3";
protected static final String BUCKETS_AGGREGATION_NAME_4 = "date_buckets_4";
protected static final String KEY = "key";
protected static final String BUCKET_AGG_KEY_AS_STRING = "key_as_string";
protected static final String SUM_AGGREGATION_NAME = "sum_aggs";
protected static final String SUM_AGGREGATION_NAME_2 = "sum_aggs_2";
protected static final String AVG_AGGREGATION_NAME = "avg_field";
protected static final String GENERIC_AGGREGATION_NAME = "my_aggregation";
protected static final String DATE_AGGREGATION_NAME = "date_aggregation";
protected static final String CLUSTER_SETTING_CONCURRENT_SEGMENT_SEARCH = "search.concurrent_segment_search.enabled";
@BeforeClass
@SneakyThrows
public static void setUpCluster() {
// we need new instance because we're calling non-static methods from static method.
// main purpose is to minimize network calls, initialization is only needed once
BaseAggregationsWithHybridQueryIT instance = new BaseAggregationsWithHybridQueryIT();
instance.initClient();
instance.updateClusterSettings();
}
@Override
protected boolean preserveClusterUponCompletion() {
return true;
}
protected void prepareResources(String indexName, String pipelineName) {
initializeIndexIfNotExist(indexName);
createSearchPipelineWithResultsPostProcessor(pipelineName);
}
@SneakyThrows
protected void initializeIndexIfNotExist(String indexName) {
if (!indexExists(indexName)) {
createIndexWithConfiguration(
indexName,
buildIndexConfiguration(
List.of(),
List.of(NESTED_TYPE_FIELD_USER, NESTED_FIELD_FIRSTNAME, NESTED_FIELD_LASTNAME),
List.of(INTEGER_FIELD_DOCINDEX),
List.of(KEYWORD_FIELD_DOCKEYWORD),
List.of(DATE_FIELD),
3
),
""
);
addKnnDoc(
indexName,
"1",
List.of(),
List.of(),
Collections.singletonList(TEST_TEXT_FIELD_NAME_1),
Collections.singletonList(TEST_DOC_TEXT1),
List.of(NESTED_TYPE_FIELD_USER),
List.of(Map.of(NESTED_FIELD_FIRSTNAME, NESTED_FIELD_FIRSTNAME_JOHN, NESTED_FIELD_LASTNAME, NESTED_FIELD_LASTNAME_BLACK)),
List.of(INTEGER_FIELD_DOCINDEX, INTEGER_FIELD_PRICE),
List.of(INTEGER_FIELD_DOCINDEX_1234, INTEGER_FIELD_PRICE_130),
List.of(KEYWORD_FIELD_DOCKEYWORD),
List.of(KEYWORD_FIELD_DOCKEYWORD_WORKABLE),
List.of(DATE_FIELD),
List.of(DATE_FIELD_01031995)
);
addKnnDoc(
indexName,
"2",
List.of(),
List.of(),
Collections.singletonList(TEST_TEXT_FIELD_NAME_1),
Collections.singletonList(TEST_DOC_TEXT3),
List.of(NESTED_TYPE_FIELD_USER),
List.of(Map.of(NESTED_FIELD_FIRSTNAME, NESTED_FIELD_FIRSTNAME_FRODO, NESTED_FIELD_LASTNAME, NESTED_FIELD_LASTNAME_BAGGINS)),
List.of(INTEGER_FIELD_DOCINDEX, INTEGER_FIELD_PRICE),
List.of(INTEGER_FIELD_DOCINDEX_2345, INTEGER_FIELD_PRICE_100),
List.of(),
List.of(),
List.of(DATE_FIELD),
List.of(DATE_FIELD_05022015)
);
addKnnDoc(
indexName,
"3",
List.of(),
List.of(),
Collections.singletonList(TEST_TEXT_FIELD_NAME_1),
Collections.singletonList(TEST_DOC_TEXT2),
List.of(NESTED_TYPE_FIELD_USER),
List.of(Map.of(NESTED_FIELD_FIRSTNAME, NESTED_FIELD_FIRSTNAME_MOHAMMED, NESTED_FIELD_LASTNAME, NESTED_FIELD_LASTNAME_EZAB)),
List.of(INTEGER_FIELD_PRICE),
List.of(INTEGER_FIELD_PRICE_200),
List.of(KEYWORD_FIELD_DOCKEYWORD),
List.of(KEYWORD_FIELD_DOCKEYWORD_ANGRY),
List.of(DATE_FIELD),
List.of(DATE_FIELD_07232007)
);
addKnnDoc(
indexName,
"4",
List.of(),
List.of(),
Collections.singletonList(TEST_TEXT_FIELD_NAME_1),
Collections.singletonList(TEST_DOC_TEXT4),
List.of(NESTED_TYPE_FIELD_USER),
List.of(Map.of(NESTED_FIELD_FIRSTNAME, NESTED_FIELD_FIRSTNAME_SUN, NESTED_FIELD_LASTNAME, NESTED_FIELD_LASTNAME_WUKONG)),
List.of(INTEGER_FIELD_DOCINDEX, INTEGER_FIELD_PRICE),
List.of(INTEGER_FIELD_DOCINDEX_3456, INTEGER_FIELD_PRICE_25),
List.of(KEYWORD_FIELD_DOCKEYWORD),
List.of(KEYWORD_FIELD_DOCKEYWORD_LIKABLE),
List.of(DATE_FIELD),
List.of(DATE_FIELD_05022015)
);
addKnnDoc(
indexName,
"5",
List.of(),
List.of(),
Collections.singletonList(TEST_TEXT_FIELD_NAME_1),
Collections.singletonList(TEST_DOC_TEXT5),
List.of(),
List.of(),
List.of(INTEGER_FIELD_DOCINDEX, INTEGER_FIELD_PRICE),
List.of(INTEGER_FIELD_DOCINDEX_3456, INTEGER_FIELD_PRICE_30),
List.of(KEYWORD_FIELD_DOCKEYWORD),
List.of(KEYWORD_FIELD_DOCKEYWORD_ENTIRE),
List.of(DATE_FIELD),
List.of(DATE_FIELD_08212012)
);
addKnnDoc(
indexName,
"6",
List.of(),
List.of(),
Collections.singletonList(TEST_TEXT_FIELD_NAME_1),
Collections.singletonList(TEST_DOC_TEXT6),
List.of(NESTED_TYPE_FIELD_USER),
List.of(Map.of(NESTED_FIELD_FIRSTNAME, NESTED_FIELD_FIRSTNAME_VASILISA, NESTED_FIELD_LASTNAME, NESTED_FIELD_LASTNAME_WISE)),
List.of(INTEGER_FIELD_DOCINDEX, INTEGER_FIELD_PRICE),
List.of(INTEGER_FIELD_DOCINDEX_4567, INTEGER_FIELD_PRICE_350),
List.of(KEYWORD_FIELD_DOCKEYWORD),
List.of(KEYWORD_FIELD_DOCKEYWORD_ENTIRE),
List.of(DATE_FIELD),
List.of(DATE_FIELD_08212012)
);
}
}
protected void assertHitResultsFromQuery(int expected, Map<String, Object> searchResponseAsMap) {
assertEquals(expected, getHitCount(searchResponseAsMap));
List<Map<String, Object>> hits1NestedList = getNestedHits(searchResponseAsMap);
List<String> ids = new ArrayList<>();
List<Double> scores = new ArrayList<>();
for (Map<String, Object> oneHit : hits1NestedList) {
ids.add((String) oneHit.get("_id"));
scores.add((Double) oneHit.get("_score"));
}
// verify that scores are in desc order
assertTrue(IntStream.range(0, scores.size() - 1).noneMatch(idx -> scores.get(idx) < scores.get(idx + 1)));
// verify that all ids are unique
assertEquals(Set.copyOf(ids).size(), ids.size());
Map<String, Object> total = getTotalHits(searchResponseAsMap);
assertNotNull(total.get("value"));
assertEquals(expected, total.get("value"));
assertNotNull(total.get("relation"));
assertEquals(RELATION_EQUAL_TO, total.get("relation"));
}
}