Skip to content

Commit fbbca1a

Browse files
Allowing execution of hybrid query on index alias with filters (#670)
* Add support for index alias with filter Signed-off-by: Martin Gaievski <gaievski@amazon.com>
1 parent cc6a6b2 commit fbbca1a

File tree

8 files changed

+385
-88
lines changed

8 files changed

+385
-88
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
1818
## [Unreleased 2.x](https://github.com/opensearch-project/neural-search/compare/2.13...2.x)
1919
### Features
2020
### Enhancements
21+
- Allowing execution of hybrid query on index alias with filters ([#670](https://github.com/opensearch-project/neural-search/pull/670))
2122
### Bug Fixes
2223
- Add support for request_cache flag in hybrid query ([#663](https://github.com/opensearch-project/neural-search/pull/663))
2324
### Infrastructure

src/main/java/org/opensearch/neuralsearch/query/HybridQuery.java

+22-2
Original file line numberDiff line numberDiff line change
@@ -29,12 +29,32 @@ public final class HybridQuery extends Query implements Iterable<Query> {
2929

3030
private final List<Query> subQueries;
3131

32-
public HybridQuery(Collection<Query> subQueries) {
32+
/**
33+
* Create new instance of hybrid query object based on collection of sub queries and filter query
34+
* @param subQueries collection of queries that are executed individually and contribute to a final list of combined scores
35+
* @param filterQueries list of filters that will be applied to each sub query. Each filter from the list is added as bool "filter" clause. If this is null sub queries will be executed as is
36+
*/
37+
public HybridQuery(final Collection<Query> subQueries, final List<Query> filterQueries) {
3338
Objects.requireNonNull(subQueries, "collection of queries must not be null");
3439
if (subQueries.isEmpty()) {
3540
throw new IllegalArgumentException("collection of queries must not be empty");
3641
}
37-
this.subQueries = new ArrayList<>(subQueries);
42+
if (Objects.isNull(filterQueries) || filterQueries.isEmpty()) {
43+
this.subQueries = new ArrayList<>(subQueries);
44+
} else {
45+
List<Query> modifiedSubQueries = new ArrayList<>();
46+
for (Query subQuery : subQueries) {
47+
BooleanQuery.Builder builder = new BooleanQuery.Builder();
48+
builder.add(subQuery, BooleanClause.Occur.MUST);
49+
filterQueries.forEach(filterQuery -> builder.add(filterQuery, BooleanClause.Occur.FILTER));
50+
modifiedSubQueries.add(builder.build());
51+
}
52+
this.subQueries = modifiedSubQueries;
53+
}
54+
}
55+
56+
public HybridQuery(final Collection<Query> subQueries) {
57+
this(subQueries, List.of());
3858
}
3959

4060
/**

src/main/java/org/opensearch/neuralsearch/search/query/HybridQueryPhaseSearcher.java

+14-11
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,14 @@
77
import java.io.IOException;
88
import java.util.LinkedList;
99
import java.util.List;
10+
import java.util.stream.Collectors;
1011

1112
import com.google.common.annotations.VisibleForTesting;
1213
import org.apache.lucene.search.BooleanClause;
1314
import org.apache.lucene.search.BooleanQuery;
1415
import org.apache.lucene.search.Query;
1516
import org.opensearch.common.settings.Settings;
1617
import org.opensearch.index.mapper.MapperService;
17-
import org.opensearch.index.search.NestedHelper;
1818
import org.opensearch.neuralsearch.query.HybridQuery;
1919
import org.opensearch.search.aggregations.AggregationProcessor;
2020
import org.opensearch.search.internal.ContextIndexSearcher;
@@ -25,6 +25,8 @@
2525

2626
import lombok.extern.log4j.Log4j2;
2727

28+
import static org.opensearch.neuralsearch.util.HybridQueryUtil.hasAliasFilter;
29+
import static org.opensearch.neuralsearch.util.HybridQueryUtil.hasNestedFieldOrNestedDocs;
2830
import static org.opensearch.neuralsearch.util.HybridQueryUtil.isHybridQuery;
2931

3032
/**
@@ -51,26 +53,27 @@ public boolean searchWith(
5153
}
5254
}
5355

54-
private static boolean hasNestedFieldOrNestedDocs(final Query query, final SearchContext searchContext) {
55-
return searchContext.mapperService().hasNested() && new NestedHelper(searchContext.mapperService()).mightMatchNestedDocs(query);
56-
}
57-
5856
private static boolean isWrappedHybridQuery(final Query query) {
5957
return query instanceof BooleanQuery
6058
&& ((BooleanQuery) query).clauses().stream().anyMatch(clauseQuery -> clauseQuery.getQuery() instanceof HybridQuery);
6159
}
6260

6361
@VisibleForTesting
6462
protected Query extractHybridQuery(final SearchContext searchContext, final Query query) {
65-
if (hasNestedFieldOrNestedDocs(query, searchContext)
63+
if ((hasAliasFilter(query, searchContext) || hasNestedFieldOrNestedDocs(query, searchContext))
6664
&& isWrappedHybridQuery(query)
67-
&& ((BooleanQuery) query).clauses().size() > 0) {
68-
// extract hybrid query and replace bool with hybrid query
65+
&& !((BooleanQuery) query).clauses().isEmpty()) {
6966
List<BooleanClause> booleanClauses = ((BooleanQuery) query).clauses();
70-
if (booleanClauses.isEmpty() || booleanClauses.get(0).getQuery() instanceof HybridQuery == false) {
71-
throw new IllegalStateException("cannot process hybrid query due to incorrect structure of top level bool query");
67+
if (!(booleanClauses.get(0).getQuery() instanceof HybridQuery)) {
68+
throw new IllegalStateException("cannot process hybrid query due to incorrect structure of top level query");
7269
}
73-
return booleanClauses.get(0).getQuery();
70+
HybridQuery hybridQuery = (HybridQuery) booleanClauses.stream().findFirst().get().getQuery();
71+
List<Query> filterQueries = booleanClauses.stream()
72+
.filter(clause -> BooleanClause.Occur.FILTER == clause.getOccur())
73+
.map(BooleanClause::getQuery)
74+
.collect(Collectors.toList());
75+
HybridQuery hybridQueryWithFilter = new HybridQuery(hybridQuery.getSubQueries(), filterQueries);
76+
return hybridQueryWithFilter;
7477
}
7578
return query;
7679
}

src/main/java/org/opensearch/neuralsearch/util/HybridQueryUtil.java

+21-25
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,14 @@
66

77
import lombok.AccessLevel;
88
import lombok.NoArgsConstructor;
9-
import org.apache.lucene.search.BooleanClause;
109
import org.apache.lucene.search.BooleanQuery;
11-
import org.apache.lucene.search.FieldExistsQuery;
1210
import org.apache.lucene.search.Query;
13-
import org.opensearch.index.mapper.SeqNoFieldMapper;
1411
import org.opensearch.index.search.NestedHelper;
1512
import org.opensearch.neuralsearch.query.HybridQuery;
1613
import org.opensearch.search.internal.SearchContext;
1714

15+
import java.util.Objects;
16+
1817
/**
1918
* Utility class for anything related to hybrid query
2019
*/
@@ -24,7 +23,7 @@ public class HybridQueryUtil {
2423
public static boolean isHybridQuery(final Query query, final SearchContext searchContext) {
2524
if (query instanceof HybridQuery) {
2625
return true;
27-
} else if (isWrappedHybridQuery(query) && hasNestedFieldOrNestedDocs(query, searchContext)) {
26+
} else if (isWrappedHybridQuery(query)) {
2827
/* Checking if this is a hybrid query that is wrapped into a Bool query by core Opensearch code
2928
https://github.com/opensearch-project/OpenSearch/blob/main/server/src/main/java/org/opensearch/search/DefaultSearchContext.java#L367-L370.
3029
main reason for that is performance optimization, at time of writing we are ok with loosing on performance if that's unblocks
@@ -34,38 +33,35 @@ public static boolean isHybridQuery(final Query query, final SearchContext searc
3433
below is sample structure of such query:
3534
3635
Boolean {
37-
should: {
38-
hybrid: {
39-
sub_query1 {}
40-
sub_query2 {}
41-
}
42-
}
43-
filter: {
44-
exists: {
45-
field: "_primary_term"
46-
}
47-
}
36+
should: {
37+
hybrid: {
38+
sub_query1 {}
39+
sub_query2 {}
40+
}
41+
}
42+
filter: {
43+
exists: {
44+
field: "_primary_term"
45+
}
46+
}
4847
}
49-
TODO Need to add logic for passing hybrid sub-queries through the same logic in core to ensure there is no latency regression */
48+
*/
5049
// we have already checked if query in instance of Boolean in higher level else if condition
51-
return ((BooleanQuery) query).clauses()
52-
.stream()
53-
.filter(clause -> clause.getQuery() instanceof HybridQuery == false)
54-
.allMatch(clause -> {
55-
return clause.getOccur() == BooleanClause.Occur.FILTER
56-
&& clause.getQuery() instanceof FieldExistsQuery
57-
&& SeqNoFieldMapper.PRIMARY_TERM_NAME.equals(((FieldExistsQuery) clause.getQuery()).getField());
58-
});
50+
return hasNestedFieldOrNestedDocs(query, searchContext) || hasAliasFilter(query, searchContext);
5951
}
6052
return false;
6153
}
6254

63-
private static boolean hasNestedFieldOrNestedDocs(final Query query, final SearchContext searchContext) {
55+
public static boolean hasNestedFieldOrNestedDocs(final Query query, final SearchContext searchContext) {
6456
return searchContext.mapperService().hasNested() && new NestedHelper(searchContext.mapperService()).mightMatchNestedDocs(query);
6557
}
6658

6759
private static boolean isWrappedHybridQuery(final Query query) {
6860
return query instanceof BooleanQuery
6961
&& ((BooleanQuery) query).clauses().stream().anyMatch(clauseQuery -> clauseQuery.getQuery() instanceof HybridQuery);
7062
}
63+
64+
public static boolean hasAliasFilter(final Query query, final SearchContext searchContext) {
65+
return Objects.nonNull(searchContext.aliasFilter());
66+
}
7167
}

src/test/java/org/opensearch/neuralsearch/query/HybridQueryIT.java

+122-6
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import org.opensearch.index.query.BoolQueryBuilder;
2929
import org.opensearch.index.query.MatchQueryBuilder;
3030
import org.opensearch.index.query.NestedQueryBuilder;
31+
import org.opensearch.index.query.QueryBuilder;
3132
import org.opensearch.index.query.QueryBuilders;
3233
import org.opensearch.index.query.RangeQueryBuilder;
3334
import org.opensearch.index.query.TermQueryBuilder;
@@ -40,6 +41,7 @@
4041
public class HybridQueryIT extends BaseNeuralSearchIT {
4142
private static final String TEST_BASIC_INDEX_NAME = "test-hybrid-basic-index";
4243
private static final String TEST_BASIC_VECTOR_DOC_FIELD_INDEX_NAME = "test-hybrid-vector-doc-field-index";
44+
private static final String TEST_MULTI_DOC_WITH_NESTED_FIELDS_INDEX_NAME = "test-hybrid-multi-doc-nested-fields-index";
4345
private static final String TEST_MULTI_DOC_INDEX_NAME = "test-hybrid-multi-doc-index";
4446
private static final String TEST_MULTI_DOC_INDEX_NAME_ONE_SHARD = "test-hybrid-multi-doc-single-shard-index";
4547
private static final String TEST_MULTI_DOC_INDEX_WITH_NESTED_TYPE_NAME_ONE_SHARD =
@@ -256,7 +258,7 @@ public void testComplexQuery_whenMultipleIdenticalSubQueries_thenSuccessful() {
256258
public void testNoMatchResults_whenOnlyTermSubQueryWithoutMatch_thenEmptyResult() {
257259
String modelId = null;
258260
try {
259-
initializeIndexIfNotExist(TEST_MULTI_DOC_INDEX_NAME);
261+
initializeIndexIfNotExist(TEST_MULTI_DOC_WITH_NESTED_FIELDS_INDEX_NAME);
260262
modelId = prepareModel();
261263
createSearchPipelineWithResultsPostProcessor(SEARCH_PIPELINE);
262264
TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery(TEST_TEXT_FIELD_NAME_1, TEST_QUERY_TEXT);
@@ -266,7 +268,7 @@ public void testNoMatchResults_whenOnlyTermSubQueryWithoutMatch_thenEmptyResult(
266268
hybridQueryBuilderOnlyTerm.add(termQuery2Builder);
267269

268270
Map<String, Object> searchResponseAsMap = search(
269-
TEST_MULTI_DOC_INDEX_NAME,
271+
TEST_MULTI_DOC_WITH_NESTED_FIELDS_INDEX_NAME,
270272
hybridQueryBuilderOnlyTerm,
271273
null,
272274
10,
@@ -283,7 +285,7 @@ public void testNoMatchResults_whenOnlyTermSubQueryWithoutMatch_thenEmptyResult(
283285
assertNotNull(total.get("relation"));
284286
assertEquals(RELATION_EQUAL_TO, total.get("relation"));
285287
} finally {
286-
wipeOfTestResources(TEST_MULTI_DOC_INDEX_NAME, null, modelId, SEARCH_PIPELINE);
288+
wipeOfTestResources(TEST_MULTI_DOC_WITH_NESTED_FIELDS_INDEX_NAME, null, modelId, SEARCH_PIPELINE);
287289
}
288290
}
289291

@@ -578,6 +580,102 @@ public void testRequestCache_whenMultipleShardsQueryReturnResults_thenSuccessful
578580
}
579581
}
580582

583+
@SneakyThrows
584+
public void testWrappedQueryWithFilter_whenIndexAliasHasFilterAndIndexWithNestedFields_thenSuccess() {
585+
String modelId = null;
586+
String alias = "alias_with_filter";
587+
try {
588+
initializeIndexIfNotExist(TEST_MULTI_DOC_WITH_NESTED_FIELDS_INDEX_NAME);
589+
modelId = prepareModel();
590+
createSearchPipelineWithResultsPostProcessor(SEARCH_PIPELINE);
591+
// create alias for index
592+
QueryBuilder aliasFilter = QueryBuilders.boolQuery()
593+
.mustNot(QueryBuilders.matchQuery(TEST_TEXT_FIELD_NAME_1, TEST_QUERY_TEXT3));
594+
createIndexAlias(TEST_MULTI_DOC_WITH_NESTED_FIELDS_INDEX_NAME, alias, aliasFilter);
595+
596+
NeuralQueryBuilder neuralQueryBuilder = new NeuralQueryBuilder(
597+
TEST_KNN_VECTOR_FIELD_NAME_1,
598+
TEST_QUERY_TEXT,
599+
"",
600+
modelId,
601+
5,
602+
null,
603+
null
604+
);
605+
HybridQueryBuilder hybridQueryBuilder = new HybridQueryBuilder();
606+
hybridQueryBuilder.add(neuralQueryBuilder);
607+
608+
Map<String, Object> searchResponseAsMap = search(
609+
alias,
610+
hybridQueryBuilder,
611+
null,
612+
10,
613+
Map.of("search_pipeline", SEARCH_PIPELINE)
614+
);
615+
616+
assertEquals(2, getHitCount(searchResponseAsMap));
617+
assertTrue(getMaxScore(searchResponseAsMap).isPresent());
618+
assertEquals(1.0f, getMaxScore(searchResponseAsMap).get(), DELTA_FOR_SCORE_ASSERTION);
619+
620+
Map<String, Object> total = getTotalHits(searchResponseAsMap);
621+
assertNotNull(total.get("value"));
622+
assertEquals(2, total.get("value"));
623+
assertNotNull(total.get("relation"));
624+
assertEquals(RELATION_EQUAL_TO, total.get("relation"));
625+
} finally {
626+
deleteIndexAlias(TEST_MULTI_DOC_WITH_NESTED_FIELDS_INDEX_NAME, alias);
627+
wipeOfTestResources(TEST_MULTI_DOC_WITH_NESTED_FIELDS_INDEX_NAME, null, modelId, SEARCH_PIPELINE);
628+
}
629+
}
630+
631+
@SneakyThrows
632+
public void testWrappedQueryWithFilter_whenIndexAliasHasFilters_thenSuccess() {
633+
String modelId = null;
634+
String alias = "alias_with_filter";
635+
try {
636+
initializeIndexIfNotExist(TEST_MULTI_DOC_INDEX_NAME);
637+
modelId = prepareModel();
638+
createSearchPipelineWithResultsPostProcessor(SEARCH_PIPELINE);
639+
// create alias for index
640+
QueryBuilder aliasFilter = QueryBuilders.boolQuery()
641+
.mustNot(QueryBuilders.matchQuery(TEST_TEXT_FIELD_NAME_1, TEST_QUERY_TEXT3));
642+
createIndexAlias(TEST_MULTI_DOC_INDEX_NAME, alias, aliasFilter);
643+
644+
NeuralQueryBuilder neuralQueryBuilder = new NeuralQueryBuilder(
645+
TEST_KNN_VECTOR_FIELD_NAME_1,
646+
TEST_QUERY_TEXT,
647+
"",
648+
modelId,
649+
5,
650+
null,
651+
null
652+
);
653+
HybridQueryBuilder hybridQueryBuilder = new HybridQueryBuilder();
654+
hybridQueryBuilder.add(neuralQueryBuilder);
655+
656+
Map<String, Object> searchResponseAsMap = search(
657+
alias,
658+
hybridQueryBuilder,
659+
null,
660+
10,
661+
Map.of("search_pipeline", SEARCH_PIPELINE)
662+
);
663+
664+
assertEquals(2, getHitCount(searchResponseAsMap));
665+
assertTrue(getMaxScore(searchResponseAsMap).isPresent());
666+
assertEquals(1.0f, getMaxScore(searchResponseAsMap).get(), DELTA_FOR_SCORE_ASSERTION);
667+
668+
Map<String, Object> total = getTotalHits(searchResponseAsMap);
669+
assertNotNull(total.get("value"));
670+
assertEquals(2, total.get("value"));
671+
assertNotNull(total.get("relation"));
672+
assertEquals(RELATION_EQUAL_TO, total.get("relation"));
673+
} finally {
674+
deleteIndexAlias(TEST_MULTI_DOC_INDEX_NAME, alias);
675+
wipeOfTestResources(TEST_MULTI_DOC_INDEX_NAME, null, modelId, SEARCH_PIPELINE);
676+
}
677+
}
678+
581679
@SneakyThrows
582680
private void initializeIndexIfNotExist(String indexName) throws IOException {
583681
if (TEST_BASIC_INDEX_NAME.equals(indexName) && !indexExists(TEST_BASIC_INDEX_NAME)) {
@@ -628,10 +726,28 @@ private void initializeIndexIfNotExist(String indexName) throws IOException {
628726
assertEquals(3, getDocCount(TEST_BASIC_VECTOR_DOC_FIELD_INDEX_NAME));
629727
}
630728

729+
if (TEST_MULTI_DOC_WITH_NESTED_FIELDS_INDEX_NAME.equals(indexName) && !indexExists(TEST_MULTI_DOC_WITH_NESTED_FIELDS_INDEX_NAME)) {
730+
createIndexWithConfiguration(
731+
indexName,
732+
buildIndexConfiguration(
733+
Collections.singletonList(new KNNFieldConfig(TEST_KNN_VECTOR_FIELD_NAME_1, TEST_DIMENSION, TEST_SPACE_TYPE)),
734+
List.of(TEST_NESTED_TYPE_FIELD_NAME_1),
735+
1
736+
),
737+
""
738+
);
739+
addDocsToIndex(TEST_MULTI_DOC_WITH_NESTED_FIELDS_INDEX_NAME);
740+
}
741+
631742
if (TEST_MULTI_DOC_INDEX_NAME.equals(indexName) && !indexExists(TEST_MULTI_DOC_INDEX_NAME)) {
632-
prepareKnnIndex(
633-
TEST_MULTI_DOC_INDEX_NAME,
634-
Collections.singletonList(new KNNFieldConfig(TEST_KNN_VECTOR_FIELD_NAME_1, TEST_DIMENSION, TEST_SPACE_TYPE))
743+
createIndexWithConfiguration(
744+
indexName,
745+
buildIndexConfiguration(
746+
Collections.singletonList(new KNNFieldConfig(TEST_KNN_VECTOR_FIELD_NAME_1, TEST_DIMENSION, TEST_SPACE_TYPE)),
747+
List.of(),
748+
1
749+
),
750+
""
635751
);
636752
addDocsToIndex(TEST_MULTI_DOC_INDEX_NAME);
637753
}

0 commit comments

Comments
 (0)