17
17
import org .apache .lucene .util .BitSet ;
18
18
import org .apache .lucene .util .BitSetIterator ;
19
19
import org .apache .lucene .util .FixedBitSet ;
20
- import org .apache .lucene .util .RamUsageEstimator ;
21
20
22
21
import java .io .IOException ;
23
22
@@ -44,8 +43,6 @@ public enum FilterIdsSelectorType {
44
43
long [] filterIds ;
45
44
private FilterIdsSelectorType filterType ;
46
45
47
- private static final long SINGLE_ELEMENT_ARRAY_BYTES_USED = RamUsageEstimator .sizeOf (new long [1 ]);
48
-
49
46
/**
50
47
* This function takes a call on what ID Selector to use:
51
48
* https://github.com/facebookresearch/faiss/wiki/Setting-search-parameters-for-one-query#idselectorarray-idselectorbatch-and-idselectorbitmap
@@ -69,6 +66,11 @@ public enum FilterIdsSelectorType {
69
66
* So iterating on 117k ids for 1 single pass is also time consuming. So, we are currently concluding to consider only size
70
67
* as factor. We need to improve on this.
71
68
*
69
+ * Array Memory: Cardinality * Long.BYTES
70
+ * BitSet Memory: MaxId / Long.BYTES
71
+ * When Array Memory <= BitSet Memory return FilterIdsSelectorType.BATCH
72
+ * Else return FilterIdsSelectorType.BITMAP;
73
+ *
72
74
* @param filterIdsBitSet Filter query result docs
73
75
* @param cardinality The number of bits that are set
74
76
* @return {@link FilterIdsSelector}
@@ -82,7 +84,7 @@ public static FilterIdsSelector getFilterIdSelector(final BitSet filterIdsBitSet
82
84
*/
83
85
filterIds = ((FixedBitSet ) filterIdsBitSet ).getBits ();
84
86
filterType = FilterIdsSelector .FilterIdsSelectorType .BITMAP ;
85
- } else if ((cardinality * SINGLE_ELEMENT_ARRAY_BYTES_USED * 8 ) <= filterIdsBitSet .length ()) {
87
+ } else if ((cardinality * Long . BYTES * Long . BYTES ) <= filterIdsBitSet .length ()) {
86
88
/**
87
89
* When filterIds is sparse bitset, using ram usage to decide FilterIdsSelectorType
88
90
*/
0 commit comments