Skip to content

Commit f5ebc1a

Browse files
committed
UpdatedFilterIdsSelector For description Select different FilterIdsSelectorType
Signed-off-by: luyuncheng <luyuncheng@bytedance.com>
1 parent 3970f98 commit f5ebc1a

File tree

1 file changed

+6
-4
lines changed

1 file changed

+6
-4
lines changed

src/main/java/org/opensearch/knn/index/query/FilterIdsSelector.java

+6-4
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
import org.apache.lucene.util.BitSet;
1818
import org.apache.lucene.util.BitSetIterator;
1919
import org.apache.lucene.util.FixedBitSet;
20-
import org.apache.lucene.util.RamUsageEstimator;
2120

2221
import java.io.IOException;
2322

@@ -44,8 +43,6 @@ public enum FilterIdsSelectorType {
4443
long[] filterIds;
4544
private FilterIdsSelectorType filterType;
4645

47-
private static final long SINGLE_ELEMENT_ARRAY_BYTES_USED = RamUsageEstimator.sizeOf(new long[1]);
48-
4946
/**
5047
* This function takes a call on what ID Selector to use:
5148
* https://github.com/facebookresearch/faiss/wiki/Setting-search-parameters-for-one-query#idselectorarray-idselectorbatch-and-idselectorbitmap
@@ -69,6 +66,11 @@ public enum FilterIdsSelectorType {
6966
* So iterating on 117k ids for 1 single pass is also time consuming. So, we are currently concluding to consider only size
7067
* as factor. We need to improve on this.
7168
*
69+
* Array Memory: Cardinality * Long.BYTES
70+
* BitSet Memory: MaxId / Long.BYTES
71+
* When Array Memory <= BitSet Memory return FilterIdsSelectorType.BATCH
72+
* Else return FilterIdsSelectorType.BITMAP;
73+
*
7274
* @param filterIdsBitSet Filter query result docs
7375
* @param cardinality The number of bits that are set
7476
* @return {@link FilterIdsSelector}
@@ -82,7 +84,7 @@ public static FilterIdsSelector getFilterIdSelector(final BitSet filterIdsBitSet
8284
*/
8385
filterIds = ((FixedBitSet) filterIdsBitSet).getBits();
8486
filterType = FilterIdsSelector.FilterIdsSelectorType.BITMAP;
85-
} else if ((cardinality * SINGLE_ELEMENT_ARRAY_BYTES_USED * 8) <= filterIdsBitSet.length()) {
87+
} else if ((cardinality * Long.BYTES * Long.BYTES) <= filterIdsBitSet.length()) {
8688
/**
8789
* When filterIds is sparse bitset, using ram usage to decide FilterIdsSelectorType
8890
*/

0 commit comments

Comments
 (0)