Skip to content

Commit 72ac22b

Browse files
Improve performance of bitmap terms filtering (#16936)
--------- Signed-off-by: bowenlan-amzn <bowenlan23@gmail.com> (cherry picked from commit ba0c4f3) Signed-off-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
1 parent c2c15bb commit 72ac22b

File tree

7 files changed

+586
-80
lines changed

7 files changed

+586
-80
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
2424
- Introduce framework for auxiliary transports and an experimental gRPC transport plugin ([#16534](https://github.com/opensearch-project/OpenSearch/pull/16534))
2525
- Support searching from doc_value using termQueryCaseInsensitive/termQuery in flat_object/keyword field([#16974](https://github.com/opensearch-project/OpenSearch/pull/16974/))
2626
- Added a new `time` field to replace the deprecated `getTime` field in `GetStats`. ([#17009](https://github.com/opensearch-project/OpenSearch/pull/17009))
27+
- Improve performance of the bitmap filtering([#16936](https://github.com/opensearch-project/OpenSearch/pull/16936/))
2728

2829
### Dependencies
2930
- Bump `com.google.cloud:google-cloud-core-http` from 2.23.0 to 2.47.0 ([#16504](https://github.com/opensearch-project/OpenSearch/pull/16504))

server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java

+3-39
Original file line numberDiff line numberDiff line change
@@ -46,10 +46,8 @@
4646
import org.apache.lucene.sandbox.document.HalfFloatPoint;
4747
import org.apache.lucene.search.BoostQuery;
4848
import org.apache.lucene.search.IndexOrDocValuesQuery;
49-
import org.apache.lucene.search.IndexSearcher;
5049
import org.apache.lucene.search.IndexSortSortedNumericDocValuesRangeQuery;
5150
import org.apache.lucene.search.MatchNoDocsQuery;
52-
import org.apache.lucene.search.PointInSetQuery;
5351
import org.apache.lucene.search.Query;
5452
import org.apache.lucene.util.BytesRef;
5553
import org.apache.lucene.util.NumericUtils;
@@ -73,6 +71,7 @@
7371
import org.opensearch.search.DocValueFormat;
7472
import org.opensearch.search.lookup.SearchLookup;
7573
import org.opensearch.search.query.BitmapDocValuesQuery;
74+
import org.opensearch.search.query.BitmapIndexQuery;
7675

7776
import java.io.IOException;
7877
import java.math.BigInteger;
@@ -81,7 +80,6 @@
8180
import java.util.ArrayList;
8281
import java.util.Arrays;
8382
import java.util.Collections;
84-
import java.util.Iterator;
8583
import java.util.List;
8684
import java.util.Map;
8785
import java.util.Objects;
@@ -888,10 +886,10 @@ public Query bitmapQuery(String field, BytesArray bitmapArray, boolean isSearcha
888886
}
889887

890888
if (isSearchable && hasDocValues) {
891-
return new IndexOrDocValuesQuery(bitmapIndexQuery(field, bitmap), new BitmapDocValuesQuery(field, bitmap));
889+
return new IndexOrDocValuesQuery(new BitmapIndexQuery(field, bitmap), new BitmapDocValuesQuery(field, bitmap));
892890
}
893891
if (isSearchable) {
894-
return bitmapIndexQuery(field, bitmap);
892+
return new BitmapIndexQuery(field, bitmap);
895893
}
896894
return new BitmapDocValuesQuery(field, bitmap);
897895
}
@@ -1507,40 +1505,6 @@ public static Query unsignedLongRangeQuery(
15071505
}
15081506
return builder.apply(l, u);
15091507
}
1510-
1511-
static PointInSetQuery bitmapIndexQuery(String field, RoaringBitmap bitmap) {
1512-
final BytesRef encoded = new BytesRef(new byte[Integer.BYTES]);
1513-
return new PointInSetQuery(field, 1, Integer.BYTES, new PointInSetQuery.Stream() {
1514-
1515-
final Iterator<Integer> iterator = bitmap.iterator();
1516-
1517-
@Override
1518-
public BytesRef next() {
1519-
int value;
1520-
if (iterator.hasNext()) {
1521-
value = iterator.next();
1522-
} else {
1523-
return null;
1524-
}
1525-
IntPoint.encodeDimension(value, encoded.bytes, 0);
1526-
return encoded;
1527-
}
1528-
}) {
1529-
@Override
1530-
public Query rewrite(IndexSearcher indexSearcher) throws IOException {
1531-
if (bitmap.isEmpty()) {
1532-
return new MatchNoDocsQuery();
1533-
}
1534-
return super.rewrite(indexSearcher);
1535-
}
1536-
1537-
@Override
1538-
protected String toString(byte[] value) {
1539-
assert value.length == Integer.BYTES;
1540-
return Integer.toString(IntPoint.decodeDimension(value, 0));
1541-
}
1542-
};
1543-
}
15441508
}
15451509

15461510
/**

server/src/main/java/org/opensearch/search/query/BitmapDocValuesQuery.java

+6-4
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@
3030

3131
import org.roaringbitmap.RoaringBitmap;
3232

33+
import static org.opensearch.search.query.BitmapIndexQuery.checkArgs;
34+
3335
/**
3436
* Filter with bitmap
3537
* <p>
@@ -43,6 +45,7 @@ public class BitmapDocValuesQuery extends Query implements Accountable {
4345
final long max;
4446

4547
public BitmapDocValuesQuery(String field, RoaringBitmap bitmap) {
48+
checkArgs(field, bitmap);
4649
this.field = field;
4750
this.bitmap = bitmap;
4851
if (!bitmap.isEmpty()) {
@@ -111,8 +114,7 @@ public boolean isCacheable(LeafReaderContext ctx) {
111114

112115
@Override
113116
public String toString(String field) {
114-
// bitmap may contain high cardinality, so choose to not show the actual values in it
115-
return field + " cardinality: " + bitmap.getLongCardinality();
117+
return "BitmapDocValuesQuery(field=" + this.field + ")";
116118
}
117119

118120
@Override
@@ -139,8 +141,8 @@ public int hashCode() {
139141

140142
@Override
141143
public long ramBytesUsed() {
142-
return RamUsageEstimator.shallowSizeOfInstance(BitmapDocValuesQuery.class) + RamUsageEstimator.sizeOfObject(field)
143-
+ RamUsageEstimator.sizeOfObject(bitmap);
144+
return RamUsageEstimator.shallowSizeOfInstance(BitmapIndexQuery.class) + RamUsageEstimator.sizeOf(field) + bitmap
145+
.getLongSizeInBytes();
144146
}
145147

146148
@Override

0 commit comments

Comments
 (0)