Skip to content

Commit e80b830

Browse files
committed
implement bitmap set query
Signed-off-by: bowenlan-amzn <bowenlan23@gmail.com>
1 parent 759b855 commit e80b830

File tree

5 files changed

+416
-37
lines changed

5 files changed

+416
-37
lines changed

server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java

+4-5
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@
9797
import java.util.function.Function;
9898
import java.util.function.Supplier;
9999

100+
import org.opensearch.search.query.BitmapIndexQuery;
100101
import org.roaringbitmap.RoaringBitmap;
101102

102103
/**
@@ -895,10 +896,10 @@ public Query bitmapQuery(String field, BytesArray bitmapArray, boolean isSearcha
895896
}
896897

897898
if (isSearchable && hasDocValues) {
898-
return new IndexOrDocValuesQuery(bitmapIndexQuery(field, bitmap), new BitmapDocValuesQuery(field, bitmap));
899+
return new IndexOrDocValuesQuery(new BitmapIndexQuery(field, bitmap), new BitmapDocValuesQuery(field, bitmap));
899900
}
900901
if (isSearchable) {
901-
return bitmapIndexQuery(field, bitmap);
902+
return new BitmapIndexQuery(field, bitmap);
902903
}
903904
return new BitmapDocValuesQuery(field, bitmap);
904905
}
@@ -1551,12 +1552,9 @@ public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOExcepti
15511552
return new ScorerSupplier() {
15521553
@Override
15531554
public Scorer get(long leadCost) throws IOException {
1554-
15551555
final BytesRef encoded = new BytesRef(new byte[Integer.BYTES]);
15561556
Query query = new PointInSetQuery(field, 1, Integer.BYTES, new PointInSetQuery.Stream() {
1557-
15581557
final Iterator<Integer> iterator = bitmap.iterator();
1559-
15601558
@Override
15611559
public BytesRef next() {
15621560
int value;
@@ -1583,6 +1581,7 @@ protected String toString(byte[] value) {
15831581
return Integer.toString(IntPoint.decodeDimension(value, 0));
15841582
}
15851583
};
1584+
15861585
return query.createWeight(searcher, scoreMode, boost).scorer(context);
15871586
}
15881587

server/src/main/java/org/opensearch/search/query/BitmapDocValuesQuery.java

+1-2
Original file line numberDiff line numberDiff line change
@@ -111,8 +111,7 @@ public boolean isCacheable(LeafReaderContext ctx) {
111111

112112
@Override
113113
public String toString(String field) {
114-
// bitmap may contain high cardinality, so choose to not show the actual values in it
115-
return field + " cardinality: " + bitmap.getLongCardinality();
114+
return "BitmapDocValuesQuery(field=" + field + ")";
116115
}
117116

118117
@Override
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,266 @@
1+
/*
2+
* SPDX-License-Identifier: Apache-2.0
3+
*
4+
* The OpenSearch Contributors require contributions made to
5+
* this file be licensed under the Apache-2.0 license or a
6+
* compatible open source license.
7+
*/
8+
9+
package org.opensearch.search.query;
10+
11+
import org.apache.lucene.document.IntPoint;
12+
import org.apache.lucene.index.LeafReader;
13+
import org.apache.lucene.index.LeafReaderContext;
14+
import org.apache.lucene.index.PointValues;
15+
import org.apache.lucene.search.ConstantScoreScorer;
16+
import org.apache.lucene.search.ConstantScoreWeight;
17+
import org.apache.lucene.search.DocIdSetIterator;
18+
import org.apache.lucene.search.IndexSearcher;
19+
import org.apache.lucene.search.MatchNoDocsQuery;
20+
import org.apache.lucene.search.Query;
21+
import org.apache.lucene.search.QueryVisitor;
22+
import org.apache.lucene.search.ScoreMode;
23+
import org.apache.lucene.search.Scorer;
24+
import org.apache.lucene.search.ScorerSupplier;
25+
import org.apache.lucene.search.Weight;
26+
import org.apache.lucene.util.Accountable;
27+
import org.apache.lucene.util.ArrayUtil;
28+
import org.apache.lucene.util.BytesRef;
29+
import org.apache.lucene.util.BytesRefIterator;
30+
import org.apache.lucene.util.DocIdSetBuilder;
31+
import org.apache.lucene.util.RamUsageEstimator;
32+
import org.roaringbitmap.RoaringBitmap;
33+
34+
import java.io.IOException;
35+
import java.util.Iterator;
36+
import java.util.Objects;
37+
38+
/**
39+
* A query that matches all documents that contain a set of integer numbers represented by bitmap
40+
*
41+
* @opensearch.internal
42+
*/
43+
public class BitmapIndexQuery extends Query implements Accountable {
44+
45+
private final RoaringBitmap bitmap;
46+
private final String field;
47+
48+
public BitmapIndexQuery(String field, RoaringBitmap bitmap) {
49+
this.bitmap = bitmap;
50+
this.field = field;
51+
}
52+
53+
private static BytesRefIterator bitmapEncodedIterator(RoaringBitmap bitmap) {
54+
return new BytesRefIterator() {
55+
private final Iterator<Integer> iterator = bitmap.iterator();
56+
private final BytesRef encoded = new BytesRef(new byte[Integer.BYTES]);
57+
58+
@Override
59+
public BytesRef next() {
60+
int value;
61+
if (iterator.hasNext()) {
62+
value = iterator.next();
63+
} else {
64+
return null;
65+
}
66+
IntPoint.encodeDimension(value, encoded.bytes, 0);
67+
return encoded;
68+
}
69+
};
70+
}
71+
72+
@Override
73+
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
74+
return new ConstantScoreWeight(this, boost) {
75+
@Override
76+
public Scorer scorer(LeafReaderContext context) throws IOException {
77+
ScorerSupplier scorerSupplier = scorerSupplier(context);
78+
if (scorerSupplier == null) {
79+
return null;
80+
}
81+
return scorerSupplier.get(Long.MAX_VALUE);
82+
}
83+
84+
@Override
85+
public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
86+
final Weight weight = this;
87+
LeafReader reader = context.reader();
88+
// get point value
89+
// only works for one dimension
90+
PointValues values = reader.getPointValues(field);
91+
if (values == null) {
92+
return null;
93+
}
94+
if (values.getNumIndexDimensions() != 1) {
95+
throw new IllegalArgumentException("field must have only one dimension");
96+
}
97+
98+
return new ScorerSupplier() {
99+
long cost = -1; // calculate lazily, and only once
100+
101+
@Override
102+
public Scorer get(long leadCost) throws IOException {
103+
DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values, field);
104+
MergePointVisitor visitor = new MergePointVisitor(result);
105+
values.intersect(visitor);
106+
return new ConstantScoreScorer(weight, score(), scoreMode, result.build().iterator());
107+
}
108+
109+
@Override
110+
public long cost() {
111+
if (cost == -1) {
112+
cost = bitmap.getLongCardinality();
113+
}
114+
return cost;
115+
}
116+
};
117+
}
118+
119+
@Override
120+
public boolean isCacheable(LeafReaderContext ctx) {
121+
// This query depend only on segment-immutable structure points
122+
return true;
123+
}
124+
};
125+
}
126+
127+
private class MergePointVisitor implements PointValues.IntersectVisitor {
128+
private final DocIdSetBuilder result;
129+
private final BytesRefIterator iterator;
130+
private BytesRef nextQueryPoint;
131+
private final ArrayUtil.ByteArrayComparator comparator;
132+
private DocIdSetBuilder.BulkAdder adder;
133+
134+
public MergePointVisitor(DocIdSetBuilder result)
135+
throws IOException {
136+
this.result = result;
137+
this.comparator = ArrayUtil.getUnsignedComparator(Integer.BYTES);
138+
this.iterator = bitmapEncodedIterator(bitmap);
139+
nextQueryPoint = iterator.next();
140+
}
141+
142+
@Override
143+
public void grow(int count) {
144+
adder = result.grow(count);
145+
}
146+
147+
@Override
148+
public void visit(int docID) {
149+
adder.add(docID);
150+
}
151+
152+
@Override
153+
public void visit(DocIdSetIterator iterator) throws IOException {
154+
adder.add(iterator);
155+
}
156+
157+
@Override
158+
public void visit(int docID, byte[] packedValue) {
159+
if (matches(packedValue)) {
160+
visit(docID);
161+
}
162+
}
163+
164+
@Override
165+
public void visit(DocIdSetIterator iterator, byte[] packedValue) throws IOException {
166+
if (matches(packedValue)) {
167+
adder.add(iterator);
168+
}
169+
}
170+
171+
private boolean matches(byte[] packedValue) {
172+
while (nextQueryPoint != null) {
173+
int cmp = comparator.compare(nextQueryPoint.bytes, nextQueryPoint.offset, packedValue, 0);
174+
if (cmp == 0) {
175+
return true;
176+
} else if (cmp < 0) {
177+
// Query point is before index point, so we move to next query point
178+
try {
179+
nextQueryPoint = iterator.next();
180+
} catch (IOException e) {
181+
throw new RuntimeException(e);
182+
}
183+
} else {
184+
// Query point is after index point, so we don't collect and we return:
185+
break;
186+
}
187+
}
188+
return false;
189+
}
190+
191+
@Override
192+
public PointValues.Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
193+
while (nextQueryPoint != null) {
194+
int cmpMin =
195+
comparator.compare(nextQueryPoint.bytes, nextQueryPoint.offset, minPackedValue, 0);
196+
if (cmpMin < 0) {
197+
// query point is before the start of this cell
198+
try {
199+
nextQueryPoint = iterator.next();
200+
} catch (IOException e) {
201+
throw new RuntimeException(e);
202+
}
203+
continue;
204+
}
205+
int cmpMax =
206+
comparator.compare(nextQueryPoint.bytes, nextQueryPoint.offset, maxPackedValue, 0);
207+
if (cmpMax > 0) {
208+
// query point is after the end of this cell
209+
return PointValues.Relation.CELL_OUTSIDE_QUERY;
210+
}
211+
212+
if (cmpMin == 0 && cmpMax == 0) {
213+
// NOTE: we only hit this if we are on a cell whose min and max values are exactly equal
214+
// to our point,
215+
// which can easily happen if many (> 512) docs share this one value
216+
return PointValues.Relation.CELL_INSIDE_QUERY;
217+
} else {
218+
return PointValues.Relation.CELL_CROSSES_QUERY;
219+
}
220+
}
221+
222+
// We exhausted all points in the query:
223+
return PointValues.Relation.CELL_OUTSIDE_QUERY;
224+
}
225+
}
226+
227+
@Override
228+
public Query rewrite(IndexSearcher indexSearcher) throws IOException {
229+
if (bitmap.isEmpty()) {
230+
return new MatchNoDocsQuery();
231+
}
232+
return super.rewrite(indexSearcher);
233+
}
234+
235+
@Override
236+
public String toString(String field) {
237+
return "BitmapIndexQuery(field=" + field + ")";
238+
}
239+
240+
@Override
241+
public void visit(QueryVisitor visitor) {
242+
if (visitor.acceptField(field)) {
243+
visitor.visitLeaf(this);
244+
}
245+
}
246+
247+
@Override
248+
public boolean equals(Object other) {
249+
if (sameClassAs(other) == false) {
250+
return false;
251+
}
252+
BitmapIndexQuery that = (BitmapIndexQuery) other;
253+
return field.equals(that.field) && bitmap.equals(that.bitmap);
254+
}
255+
256+
@Override
257+
public int hashCode() {
258+
return Objects.hash(classHash(), field, bitmap);
259+
}
260+
261+
@Override
262+
public long ramBytesUsed() {
263+
return RamUsageEstimator.shallowSizeOfInstance(BitmapIndexQuery.class) + RamUsageEstimator.sizeOfObject(field)
264+
+ RamUsageEstimator.sizeOfObject(bitmap);
265+
}
266+
}

server/src/test/java/org/opensearch/search/query/BitmapDocValuesQueryTests.java

+4-30
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@
3434

3535
import org.roaringbitmap.RoaringBitmap;
3636

37+
import static org.opensearch.search.query.BitmapIndexQueryTests.getMatchingValues;
38+
3739
public class BitmapDocValuesQueryTests extends OpenSearchTestCase {
3840
private Directory dir;
3941
private IndexWriter w;
@@ -81,21 +83,7 @@ public void testScore() throws IOException {
8183

8284
Weight weight = searcher.createWeight(searcher.rewrite(query), ScoreMode.COMPLETE_NO_SCORES, 1f);
8385

84-
List<Integer> actual = new LinkedList<>();
85-
for (LeafReaderContext leaf : searcher.getIndexReader().leaves()) {
86-
// use doc values to get the actual value of the matching docs and assert
87-
// cannot directly check the docId because test can randomize segment numbers
88-
SortedNumericDocValues dv = DocValues.getSortedNumeric(leaf.reader(), "product_id");
89-
Scorer scorer = weight.scorer(leaf);
90-
DocIdSetIterator disi = scorer.iterator();
91-
int docId;
92-
while ((docId = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
93-
dv.advanceExact(docId);
94-
for (int count = 0; count < dv.docValueCount(); ++count) {
95-
actual.add((int) dv.nextValue());
96-
}
97-
}
98-
}
86+
List<Integer> actual = getMatchingValues(weight, searcher.getIndexReader());
9987
List<Integer> expected = List.of(1, 4);
10088
assertEquals(expected, actual);
10189
}
@@ -128,21 +116,7 @@ public void testScoreMutilValues() throws IOException {
128116

129117
Weight weight = searcher.createWeight(searcher.rewrite(query), ScoreMode.COMPLETE_NO_SCORES, 1f);
130118

131-
Set<Integer> actual = new HashSet<>();
132-
for (LeafReaderContext leaf : searcher.getIndexReader().leaves()) {
133-
// use doc values to get the actual value of the matching docs and assert
134-
// cannot directly check the docId because test can randomize segment numbers
135-
SortedNumericDocValues dv = DocValues.getSortedNumeric(leaf.reader(), "product_id");
136-
Scorer scorer = weight.scorer(leaf);
137-
DocIdSetIterator disi = scorer.iterator();
138-
int docId;
139-
while ((docId = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
140-
dv.advanceExact(docId);
141-
for (int count = 0; count < dv.docValueCount(); ++count) {
142-
actual.add((int) dv.nextValue());
143-
}
144-
}
145-
}
119+
Set<Integer> actual = new HashSet<>(getMatchingValues(weight, searcher.getIndexReader()));
146120
Set<Integer> expected = Set.of(2, 3);
147121
assertEquals(expected, actual);
148122
}

0 commit comments

Comments
 (0)