7
7
import org .apache .lucene .index .Term ;
8
8
import org .apache .lucene .search .BooleanClause ;
9
9
import org .apache .lucene .search .BooleanQuery ;
10
+ import org .apache .lucene .search .Query ;
10
11
import org .apache .lucene .search .TermQuery ;
11
12
import org .opensearch .neuralsearch .highlight .extractor .BooleanQueryTextExtractor ;
12
13
import org .opensearch .neuralsearch .highlight .extractor .NeuralQueryTextExtractor ;
13
14
import org .opensearch .neuralsearch .highlight .extractor .QueryTextExtractorRegistry ;
14
15
import org .opensearch .neuralsearch .highlight .extractor .TermQueryTextExtractor ;
16
+ import org .opensearch .neuralsearch .highlight .extractor .HybridQueryTextExtractor ;
15
17
import org .opensearch .neuralsearch .query .NeuralKNNQuery ;
18
+ import org .opensearch .neuralsearch .query .HybridQuery ;
19
+ import org .opensearch .neuralsearch .query .HybridQueryContext ;
16
20
import org .opensearch .test .OpenSearchTestCase ;
17
21
22
+ import java .util .ArrayList ;
23
+ import java .util .List ;
24
+
18
25
import static org .mockito .Mockito .mock ;
19
26
import static org .mockito .Mockito .when ;
20
27
@@ -48,8 +55,14 @@ public void testTermQueryExtractor() {
48
55
49
56
// Test with non-TermQuery
50
57
BooleanQuery booleanQuery = new BooleanQuery .Builder ().build ();
51
- result = extractor .extractQueryText (booleanQuery , "content" );
52
- assertEquals ("Should return empty string for non-TermQuery" , "" , result );
58
+ IllegalArgumentException exception = expectThrows (
59
+ IllegalArgumentException .class ,
60
+ () -> extractor .extractQueryText (booleanQuery , "content" )
61
+ );
62
+ assertTrue (
63
+ "Should throw IllegalArgumentException with correct message" ,
64
+ exception .getMessage ().contains ("Expected TermQuery but got BooleanQuery" )
65
+ );
53
66
}
54
67
55
68
/**
@@ -67,8 +80,14 @@ public void testNeuralQueryExtractor() {
67
80
68
81
// Test with non-NeuralKNNQuery
69
82
TermQuery termQuery = new TermQuery (new Term ("content" , "term" ));
70
- result = extractor .extractQueryText (termQuery , "content" );
71
- assertEquals ("Should return empty string for non-NeuralKNNQuery" , "" , result );
83
+ IllegalArgumentException exception = expectThrows (
84
+ IllegalArgumentException .class ,
85
+ () -> extractor .extractQueryText (termQuery , "content" )
86
+ );
87
+ assertTrue (
88
+ "Should throw IllegalArgumentException with correct message" ,
89
+ exception .getMessage ().contains ("Expected NeuralKNNQuery but got TermQuery" )
90
+ );
72
91
}
73
92
74
93
/**
@@ -91,8 +110,14 @@ public void testBooleanQueryExtractor() {
91
110
92
111
// Test with non-BooleanQuery
93
112
TermQuery termQuery = new TermQuery (new Term ("content" , "term" ));
94
- result = extractor .extractQueryText (termQuery , "content" );
95
- assertEquals ("Should return empty string for non-BooleanQuery" , "" , result );
113
+ IllegalArgumentException exception = expectThrows (
114
+ IllegalArgumentException .class ,
115
+ () -> extractor .extractQueryText (termQuery , "content" )
116
+ );
117
+ assertTrue (
118
+ "Should throw IllegalArgumentException with correct message" ,
119
+ exception .getMessage ().contains ("Expected BooleanQuery but got TermQuery" )
120
+ );
96
121
97
122
// Test with empty clauses
98
123
BooleanQuery emptyQuery = new BooleanQuery .Builder ().build ();
@@ -160,4 +185,111 @@ public void visit(org.apache.lucene.search.QueryVisitor visitor) {
160
185
result = registry .extractQueryText (customQuery , "content" );
161
186
assertEquals ("Should use custom extractor" , "custom-extracted" , result );
162
187
}
188
+
189
+ /**
190
+ * Tests the HybridQueryTextExtractor
191
+ */
192
+ public void testHybridQueryExtractor () {
193
+ // Create a hybrid query with multiple sub-queries
194
+ List <Query > subQueries = new ArrayList <>();
195
+
196
+ // Add a term query
197
+ TermQuery termQuery = new TermQuery (new Term ("content" , "machine" ));
198
+ subQueries .add (termQuery );
199
+
200
+ // Add a boolean query (match query)
201
+ BooleanQuery .Builder boolBuilder = new BooleanQuery .Builder ();
202
+ boolBuilder .add (new TermQuery (new Term ("content" , "learning" )), BooleanClause .Occur .MUST );
203
+ subQueries .add (boolBuilder .build ());
204
+
205
+ // Add a neural query
206
+ NeuralKNNQuery neuralQuery = mock (NeuralKNNQuery .class );
207
+ when (neuralQuery .getOriginalQueryText ()).thenReturn ("AI systems that can learn" );
208
+ subQueries .add (neuralQuery );
209
+
210
+ // Create the hybrid query
211
+ HybridQuery hybridQuery = new HybridQuery (subQueries , HybridQueryContext .builder ().build ());
212
+
213
+ // Test extraction
214
+ String result = registry .extractQueryText (hybridQuery , "content" );
215
+ assertEquals ("Should combine all query texts correctly" , "machine learning AI systems that can learn" , result );
216
+
217
+ // Test with non-HybridQuery
218
+ TermQuery nonHybridQuery = new TermQuery (new Term ("content" , "term" ));
219
+ IllegalArgumentException exception = expectThrows (IllegalArgumentException .class , () -> {
220
+ HybridQueryTextExtractor extractor = new HybridQueryTextExtractor (registry );
221
+ extractor .extractQueryText (nonHybridQuery , "content" );
222
+ });
223
+ assertTrue (
224
+ "Should throw IllegalArgumentException with correct message" ,
225
+ exception .getMessage ().contains ("Expected HybridQuery but got TermQuery" )
226
+ );
227
+ }
228
+
229
+ /**
230
+ * Tests the HybridQueryTextExtractor with empty or invalid sub-queries
231
+ */
232
+ public void testHybridQueryExtractorWithEmptyQueries () {
233
+ // Create a hybrid query with no valid text
234
+ List <Query > subQueries = new ArrayList <>();
235
+
236
+ // Add a term query with non-matching field
237
+ TermQuery termQuery = new TermQuery (new Term ("title" , "machine" ));
238
+ subQueries .add (termQuery );
239
+
240
+ // Create the hybrid query
241
+ HybridQuery hybridQuery = new HybridQuery (subQueries , HybridQueryContext .builder ().build ());
242
+
243
+ // Test extraction
244
+ String result = registry .extractQueryText (hybridQuery , "content" );
245
+ assertEquals ("Should return empty string for no valid text" , "" , result );
246
+ }
247
+
248
+ /**
249
+ * Tests the HybridQueryTextExtractor with duplicate texts
250
+ */
251
+ public void testHybridQueryExtractorWithDuplicates () {
252
+ List <Query > subQueries = new ArrayList <>();
253
+
254
+ // Add two term queries with the same text
255
+ subQueries .add (new TermQuery (new Term ("content" , "duplicate" )));
256
+ subQueries .add (new TermQuery (new Term ("content" , "duplicate" )));
257
+
258
+ // Add a neural query with overlapping text
259
+ NeuralKNNQuery neuralQuery = mock (NeuralKNNQuery .class );
260
+ when (neuralQuery .getOriginalQueryText ()).thenReturn ("duplicate text" );
261
+ subQueries .add (neuralQuery );
262
+
263
+ // Create the hybrid query
264
+ HybridQuery hybridQuery = new HybridQuery (subQueries , HybridQueryContext .builder ().build ());
265
+
266
+ // Test extraction
267
+ String result = registry .extractQueryText (hybridQuery , "content" );
268
+ assertEquals ("Should deduplicate terms" , "duplicate duplicate text" , result );
269
+ }
270
+
271
+ /**
272
+ * Tests the HybridQueryTextExtractor with nested queries
273
+ */
274
+ public void testHybridQueryExtractorWithNestedQueries () {
275
+ List <Query > subQueries = new ArrayList <>();
276
+
277
+ // Create a boolean query with multiple terms
278
+ BooleanQuery .Builder boolBuilder = new BooleanQuery .Builder ();
279
+ boolBuilder .add (new TermQuery (new Term ("content" , "nested" )), BooleanClause .Occur .MUST );
280
+ boolBuilder .add (new TermQuery (new Term ("content" , "terms" )), BooleanClause .Occur .MUST );
281
+ subQueries .add (boolBuilder .build ());
282
+
283
+ // Add a neural query
284
+ NeuralKNNQuery neuralQuery = mock (NeuralKNNQuery .class );
285
+ when (neuralQuery .getOriginalQueryText ()).thenReturn ("neural text" );
286
+ subQueries .add (neuralQuery );
287
+
288
+ // Create the hybrid query
289
+ HybridQuery hybridQuery = new HybridQuery (subQueries , HybridQueryContext .builder ().build ());
290
+
291
+ // Test extraction
292
+ String result = registry .extractQueryText (hybridQuery , "content" );
293
+ assertEquals ("Should handle nested queries correctly" , "nested terms neural text" , result );
294
+ }
163
295
}
0 commit comments