Skip to content

Commit 52ecbe9

Browse files
Support Filtering on Large List encoded by Bitmap (opensearch-project#14774)
--------- Signed-off-by: bowenlan-amzn <bowenlan23@gmail.com> Signed-off-by: Michael Froh <froh@amazon.com> Co-authored-by: Michael Froh <froh@amazon.com>
1 parent adf3660 commit 52ecbe9

File tree

15 files changed

+1028
-14
lines changed

15 files changed

+1028
-14
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
1919
- Add took time to request nodes stats ([#15054](https://github.com/opensearch-project/OpenSearch/pull/15054))
2020
- [Workload Management] Add Get QueryGroup API Logic ([14709](https://github.com/opensearch-project/OpenSearch/pull/14709))
2121
- [Workload Management] QueryGroup resource tracking framework changes ([#13897](https://github.com/opensearch-project/OpenSearch/pull/13897))
22+
- Support filtering on a large list encoded by bitmap ([#14774](https://github.com/opensearch-project/OpenSearch/pull/14774))
2223
- Add slice execution listeners to SearchOperationListener interface ([#15153](https://github.com/opensearch-project/OpenSearch/pull/15153))
2324

2425
### Dependencies
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,184 @@
1+
---
2+
setup:
3+
- skip:
4+
version: " - 2.99.99"
5+
reason: The bitmap filtering feature is available in 2.17 and later.
6+
- do:
7+
indices.create:
8+
index: students
9+
body:
10+
settings:
11+
number_of_shards: 1
12+
number_of_replicas: 0
13+
mappings:
14+
properties:
15+
student_id:
16+
type: integer
17+
- do:
18+
bulk:
19+
refresh: true
20+
body:
21+
- { "index": { "_index": "students", "_id": "1" } }
22+
- { "name": "Jane Doe", "student_id": 111 }
23+
- { "index": { "_index": "students", "_id": "2" } }
24+
- { "name": "Mary Major", "student_id": 222 }
25+
- { "index": { "_index": "students", "_id": "3" } }
26+
- { "name": "John Doe", "student_id": 333 }
27+
- do:
28+
indices.create:
29+
index: classes
30+
body:
31+
settings:
32+
number_of_shards: 1
33+
number_of_replicas: 0
34+
mappings:
35+
properties:
36+
enrolled:
37+
type: binary
38+
store: true
39+
- do:
40+
bulk:
41+
refresh: true
42+
body:
43+
- { "index": { "_index": "classes", "_id": "101" } }
44+
- { "enrolled": "OjAAAAEAAAAAAAEAEAAAAG8A3gA=" } # 111,222
45+
- { "index": { "_index": "classes", "_id": "102" } }
46+
- { "enrolled": "OjAAAAEAAAAAAAAAEAAAAG8A" } # 111
47+
- { "index": { "_index": "classes", "_id": "103" } }
48+
- { "enrolled": "OjAAAAEAAAAAAAAAEAAAAE0B" } # 333
49+
- { "index": { "_index": "classes", "_id": "104" } }
50+
- { "enrolled": "OjAAAAEAAAAAAAEAEAAAAN4ATQE=" } # 222,333
51+
- do:
52+
cluster.health:
53+
wait_for_status: green
54+
55+
---
56+
"Terms lookup on a binary field with bitmap":
57+
- do:
58+
search:
59+
rest_total_hits_as_int: true
60+
index: students
61+
body: {
62+
"query": {
63+
"terms": {
64+
"student_id": {
65+
"index": "classes",
66+
"id": "101",
67+
"path": "enrolled",
68+
"store": true
69+
},
70+
"value_type": "bitmap"
71+
}
72+
}
73+
}
74+
- match: { hits.total: 2 }
75+
- match: { hits.hits.0._source.name: Jane Doe }
76+
- match: { hits.hits.0._source.student_id: 111 }
77+
- match: { hits.hits.1._source.name: Mary Major }
78+
- match: { hits.hits.1._source.student_id: 222 }
79+
80+
---
81+
"Terms query accepting bitmap as value":
82+
- do:
83+
search:
84+
rest_total_hits_as_int: true
85+
index: students
86+
body: {
87+
"query": {
88+
"terms": {
89+
"student_id": ["OjAAAAEAAAAAAAEAEAAAAG8A3gA="],
90+
"value_type": "bitmap"
91+
}
92+
}
93+
}
94+
- match: { hits.total: 2 }
95+
- match: { hits.hits.0._source.name: Jane Doe }
96+
- match: { hits.hits.0._source.student_id: 111 }
97+
- match: { hits.hits.1._source.name: Mary Major }
98+
- match: { hits.hits.1._source.student_id: 222 }
99+
100+
---
101+
"Boolean must bitmap filtering":
102+
- do:
103+
search:
104+
rest_total_hits_as_int: true
105+
index: students
106+
body: {
107+
"query": {
108+
"bool": {
109+
"must": [
110+
{
111+
"terms": {
112+
"student_id": {
113+
"index": "classes",
114+
"id": "101",
115+
"path": "enrolled",
116+
"store": true
117+
},
118+
"value_type": "bitmap"
119+
}
120+
}
121+
],
122+
"must_not": [
123+
{
124+
"terms": {
125+
"student_id": {
126+
"index": "classes",
127+
"id": "104",
128+
"path": "enrolled",
129+
"store": true
130+
},
131+
"value_type": "bitmap"
132+
}
133+
}
134+
]
135+
}
136+
}
137+
}
138+
- match: { hits.total: 1 }
139+
- match: { hits.hits.0._source.name: Jane Doe }
140+
- match: { hits.hits.0._source.student_id: 111 }
141+
142+
---
143+
"Boolean should bitmap filtering":
144+
- do:
145+
search:
146+
rest_total_hits_as_int: true
147+
index: students
148+
body: {
149+
"query": {
150+
"bool": {
151+
"should": [
152+
{
153+
"terms": {
154+
"student_id": {
155+
"index": "classes",
156+
"id": "101",
157+
"path": "enrolled",
158+
"store": true
159+
},
160+
"value_type": "bitmap"
161+
}
162+
},
163+
{
164+
"terms": {
165+
"student_id": {
166+
"index": "classes",
167+
"id": "104",
168+
"path": "enrolled",
169+
"store": true
170+
},
171+
"value_type": "bitmap"
172+
}
173+
}
174+
]
175+
}
176+
}
177+
}
178+
- match: { hits.total: 3 }
179+
- match: { hits.hits.0._source.name: Mary Major }
180+
- match: { hits.hits.0._source.student_id: 222 }
181+
- match: { hits.hits.1._source.name: Jane Doe }
182+
- match: { hits.hits.1._source.student_id: 111 }
183+
- match: { hits.hits.2._source.name: John Doe }
184+
- match: { hits.hits.2._source.student_id: 333 }

server/build.gradle

+3
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,9 @@ dependencies {
126126
api "com.google.protobuf:protobuf-java:${versions.protobuf}"
127127
api "jakarta.annotation:jakarta.annotation-api:${versions.jakarta_annotation}"
128128

129+
// https://mvnrepository.com/artifact/org.roaringbitmap/RoaringBitmap
130+
implementation 'org.roaringbitmap:RoaringBitmap:1.1.0'
131+
129132
testImplementation(project(":test:framework")) {
130133
// tests use the locally compiled version of server
131134
exclude group: 'org.opensearch', module: 'server'
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
9607213861158ae7060234d93ee9c9cb19f494d1

0 commit comments

Comments
 (0)