Skip to content

Commit 0697100

Browse files
committed
Use iterator that can advance, add random value unit test
Signed-off-by: bowenlan-amzn <bowenlan23@gmail.com>
1 parent cc6b201 commit 0697100

File tree

4 files changed

+76
-39
lines changed

4 files changed

+76
-39
lines changed

server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java

+2-1
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@
8080
import org.opensearch.search.DocValueFormat;
8181
import org.opensearch.search.lookup.SearchLookup;
8282
import org.opensearch.search.query.BitmapDocValuesQuery;
83+
import org.opensearch.search.query.BitmapIndexQuery;
8384

8485
import java.io.IOException;
8586
import java.math.BigInteger;
@@ -97,7 +98,6 @@
9798
import java.util.function.Function;
9899
import java.util.function.Supplier;
99100

100-
import org.opensearch.search.query.BitmapIndexQuery;
101101
import org.roaringbitmap.RoaringBitmap;
102102

103103
/**
@@ -1555,6 +1555,7 @@ public Scorer get(long leadCost) throws IOException {
15551555
final BytesRef encoded = new BytesRef(new byte[Integer.BYTES]);
15561556
Query query = new PointInSetQuery(field, 1, Integer.BYTES, new PointInSetQuery.Stream() {
15571557
final Iterator<Integer> iterator = bitmap.iterator();
1558+
15581559
@Override
15591560
public BytesRef next() {
15601561
int value;

server/src/main/java/org/opensearch/search/query/BitmapIndexQuery.java

+30-28
Original file line numberDiff line numberDiff line change
@@ -29,12 +29,13 @@
2929
import org.apache.lucene.util.BytesRefIterator;
3030
import org.apache.lucene.util.DocIdSetBuilder;
3131
import org.apache.lucene.util.RamUsageEstimator;
32-
import org.roaringbitmap.RoaringBitmap;
3332

3433
import java.io.IOException;
35-
import java.util.Iterator;
3634
import java.util.Objects;
3735

36+
import org.roaringbitmap.PeekableIntIterator;
37+
import org.roaringbitmap.RoaringBitmap;
38+
3839
/**
3940
* A query that matches all documents that contain a set of integer numbers represented by bitmap
4041
*
@@ -50,12 +51,19 @@ public BitmapIndexQuery(String field, RoaringBitmap bitmap) {
5051
this.field = field;
5152
}
5253

53-
private static BytesRefIterator bitmapEncodedIterator(RoaringBitmap bitmap) {
54-
return new BytesRefIterator() {
55-
private final Iterator<Integer> iterator = bitmap.iterator();
54+
interface BitmapIterator extends BytesRefIterator {
55+
// wrap IntIterator.next()
56+
BytesRef next();
57+
58+
// expose PeekableIntIterator.advanceIfNeeded, advance as long as the next value is smaller than target
59+
void advance(byte[] target);
60+
}
61+
62+
private static BitmapIterator bitmapEncodedIterator(RoaringBitmap bitmap) {
63+
return new BitmapIterator() {
64+
private final PeekableIntIterator iterator = bitmap.getIntIterator();
5665
private final BytesRef encoded = new BytesRef(new byte[Integer.BYTES]);
5766

58-
@Override
5967
public BytesRef next() {
6068
int value;
6169
if (iterator.hasNext()) {
@@ -66,6 +74,10 @@ public BytesRef next() {
6674
IntPoint.encodeDimension(value, encoded.bytes, 0);
6775
return encoded;
6876
}
77+
78+
public void advance(byte[] target) {
79+
iterator.advanceIfNeeded(IntPoint.decodeDimension(target, 0));
80+
}
6981
};
7082
}
7183

@@ -85,8 +97,7 @@ public Scorer scorer(LeafReaderContext context) throws IOException {
8597
public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
8698
final Weight weight = this;
8799
LeafReader reader = context.reader();
88-
// get point value
89-
// only works for one dimension
100+
// get the point value which should be one dimension, since bitmap saves integers
90101
PointValues values = reader.getPointValues(field);
91102
if (values == null) {
92103
return null;
@@ -118,21 +129,20 @@ public long cost() {
118129

119130
@Override
120131
public boolean isCacheable(LeafReaderContext ctx) {
121-
// This query depend only on segment-immutable structure points
132+
// This query depend only on segment-immutable structure points
122133
return true;
123134
}
124135
};
125136
}
126137

127138
private class MergePointVisitor implements PointValues.IntersectVisitor {
128139
private final DocIdSetBuilder result;
129-
private final BytesRefIterator iterator;
140+
private final BitmapIterator iterator;
130141
private BytesRef nextQueryPoint;
131142
private final ArrayUtil.ByteArrayComparator comparator;
132143
private DocIdSetBuilder.BulkAdder adder;
133144

134-
public MergePointVisitor(DocIdSetBuilder result)
135-
throws IOException {
145+
public MergePointVisitor(DocIdSetBuilder result) throws IOException {
136146
this.result = result;
137147
this.comparator = ArrayUtil.getUnsignedComparator(Integer.BYTES);
138148
this.iterator = bitmapEncodedIterator(bitmap);
@@ -175,11 +185,8 @@ private boolean matches(byte[] packedValue) {
175185
return true;
176186
} else if (cmp < 0) {
177187
// Query point is before index point, so we move to next query point
178-
try {
179-
nextQueryPoint = iterator.next();
180-
} catch (IOException e) {
181-
throw new RuntimeException(e);
182-
}
188+
iterator.advance(packedValue);
189+
nextQueryPoint = iterator.next();
183190
} else {
184191
// Query point is after index point, so we don't collect and we return:
185192
break;
@@ -191,19 +198,14 @@ private boolean matches(byte[] packedValue) {
191198
@Override
192199
public PointValues.Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
193200
while (nextQueryPoint != null) {
194-
int cmpMin =
195-
comparator.compare(nextQueryPoint.bytes, nextQueryPoint.offset, minPackedValue, 0);
201+
int cmpMin = comparator.compare(nextQueryPoint.bytes, nextQueryPoint.offset, minPackedValue, 0);
196202
if (cmpMin < 0) {
197203
// query point is before the start of this cell
198-
try {
199-
nextQueryPoint = iterator.next();
200-
} catch (IOException e) {
201-
throw new RuntimeException(e);
202-
}
204+
iterator.advance(minPackedValue);
205+
nextQueryPoint = iterator.next();
203206
continue;
204207
}
205-
int cmpMax =
206-
comparator.compare(nextQueryPoint.bytes, nextQueryPoint.offset, maxPackedValue, 0);
208+
int cmpMax = comparator.compare(nextQueryPoint.bytes, nextQueryPoint.offset, maxPackedValue, 0);
207209
if (cmpMax > 0) {
208210
// query point is after the end of this cell
209211
return PointValues.Relation.CELL_OUTSIDE_QUERY;
@@ -260,7 +262,7 @@ public int hashCode() {
260262

261263
@Override
262264
public long ramBytesUsed() {
263-
return RamUsageEstimator.shallowSizeOfInstance(BitmapIndexQuery.class) + RamUsageEstimator.sizeOfObject(field)
264-
+ RamUsageEstimator.sizeOfObject(bitmap);
265+
return RamUsageEstimator.shallowSizeOfInstance(BitmapIndexQuery.class) + RamUsageEstimator.sizeOfObject(field) + RamUsageEstimator
266+
.sizeOfObject(bitmap);
265267
}
266268
}

server/src/test/java/org/opensearch/search/query/BitmapDocValuesQueryTests.java

-6
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,9 @@
1212
import org.apache.lucene.document.Field;
1313
import org.apache.lucene.document.IntField;
1414
import org.apache.lucene.index.DirectoryReader;
15-
import org.apache.lucene.index.DocValues;
1615
import org.apache.lucene.index.IndexWriter;
17-
import org.apache.lucene.index.LeafReaderContext;
18-
import org.apache.lucene.index.SortedNumericDocValues;
19-
import org.apache.lucene.search.DocIdSetIterator;
2016
import org.apache.lucene.search.IndexSearcher;
2117
import org.apache.lucene.search.ScoreMode;
22-
import org.apache.lucene.search.Scorer;
2318
import org.apache.lucene.search.Weight;
2419
import org.apache.lucene.store.Directory;
2520
import org.opensearch.test.OpenSearchTestCase;
@@ -28,7 +23,6 @@
2823

2924
import java.io.IOException;
3025
import java.util.HashSet;
31-
import java.util.LinkedList;
3226
import java.util.List;
3327
import java.util.Set;
3428

server/src/test/java/org/opensearch/search/query/BitmapIndexQueryTests.java

+44-4
Original file line numberDiff line numberDiff line change
@@ -23,17 +23,21 @@
2323
import org.apache.lucene.search.Scorer;
2424
import org.apache.lucene.search.Weight;
2525
import org.apache.lucene.store.Directory;
26+
import org.opensearch.test.OpenSearchTestCase;
2627
import org.junit.After;
2728
import org.junit.Before;
28-
import org.opensearch.test.OpenSearchTestCase;
29-
import org.roaringbitmap.RoaringBitmap;
3029

3130
import java.io.IOException;
31+
import java.util.ArrayList;
32+
import java.util.Collections;
3233
import java.util.HashSet;
3334
import java.util.LinkedList;
3435
import java.util.List;
36+
import java.util.Random;
3537
import java.util.Set;
3638

39+
import org.roaringbitmap.RoaringBitmap;
40+
3741
public class BitmapIndexQueryTests extends OpenSearchTestCase {
3842
private Directory dir;
3943
private IndexWriter w;
@@ -86,11 +90,11 @@ public void testScore() throws IOException {
8690
assertEquals(expected, actual);
8791
}
8892

93+
// use doc values to get the actual value of the matching docs
94+
// cannot directly check the docId because test can randomize segment numbers
8995
static List<Integer> getMatchingValues(Weight weight, IndexReader reader) throws IOException {
9096
List<Integer> actual = new LinkedList<>();
9197
for (LeafReaderContext leaf : reader.leaves()) {
92-
// use doc values to get the actual value of the matching docs and assert
93-
// cannot directly check the docId because test can randomize segment numbers
9498
SortedNumericDocValues dv = DocValues.getSortedNumeric(leaf.reader(), "product_id");
9599
Scorer scorer = weight.scorer(leaf);
96100
DocIdSetIterator disi = scorer.iterator();
@@ -138,4 +142,40 @@ public void testScoreMutilValues() throws IOException {
138142
assertEquals(expected, actual);
139143
}
140144

145+
public void testRandomDocumentsAndQueries() throws IOException {
146+
Random random = new Random();
147+
int valueRange = 10_000; // the range of query values should be within indexed values
148+
149+
for (int i = 0; i < valueRange + 1; i++) {
150+
Document d = new Document();
151+
d.add(new IntField("product_id", i, Field.Store.NO));
152+
w.addDocument(d);
153+
}
154+
155+
w.commit();
156+
reader = DirectoryReader.open(w);
157+
searcher = newSearcher(reader);
158+
159+
// Generate random values for bitmap query
160+
Set<Integer> queryValues = new HashSet<>();
161+
int numberOfValues = 5;
162+
for (int i = 0; i < numberOfValues; i++) {
163+
int value = random.nextInt(valueRange) + 1;
164+
queryValues.add(value);
165+
}
166+
RoaringBitmap bitmap = new RoaringBitmap();
167+
bitmap.add(queryValues.stream().mapToInt(Integer::intValue).toArray());
168+
169+
BitmapIndexQuery query = new BitmapIndexQuery("product_id", bitmap);
170+
Weight weight = searcher.createWeight(searcher.rewrite(query), ScoreMode.COMPLETE_NO_SCORES, 1f);
171+
172+
Set<Integer> actualSet = new HashSet<>(getMatchingValues(weight, searcher.getIndexReader()));
173+
174+
List<Integer> expected = new ArrayList<>(queryValues);
175+
Collections.sort(expected);
176+
List<Integer> actual = new ArrayList<>(actualSet);
177+
Collections.sort(actual);
178+
assertEquals(expected, actual);
179+
}
180+
141181
}

0 commit comments

Comments
 (0)