Skip to content

Commit f5dbbb0

Browse files
authored
Fix aggs result of NestedAggregator with sub NestedAggregator (#13324)
Signed-off-by: kkewwei <kkewwei@163.com>
1 parent bcccedb commit f5dbbb0

File tree

4 files changed

+399
-34
lines changed

4 files changed

+399
-34
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
3939
- Fix the computed max shards of cluster to avoid int overflow ([#14155](https://github.com/opensearch-project/OpenSearch/pull/14155))
4040
- Fixed rest-high-level client searchTemplate & mtermVectors endpoints to have a leading slash ([#14465](https://github.com/opensearch-project/OpenSearch/pull/14465))
4141
- Write shard level metadata blob when snapshotting searchable snapshot indexes ([#13190](https://github.com/opensearch-project/OpenSearch/pull/13190))
42+
- Fix aggs result of NestedAggregator with sub NestedAggregator ([#13324](https://github.com/opensearch-project/OpenSearch/pull/13324))
4243

4344
### Security
4445

server/src/main/java/org/opensearch/search/aggregations/bucket/nested/NestedAggregator.java

+65-19
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,10 @@
4343
import org.apache.lucene.search.Weight;
4444
import org.apache.lucene.search.join.BitSetProducer;
4545
import org.apache.lucene.util.BitSet;
46+
import org.opensearch.common.collect.Tuple;
4647
import org.opensearch.common.lucene.search.Queries;
4748
import org.opensearch.core.ParseField;
49+
import org.opensearch.index.mapper.MapperService;
4850
import org.opensearch.index.mapper.ObjectMapper;
4951
import org.opensearch.search.aggregations.Aggregator;
5052
import org.opensearch.search.aggregations.AggregatorFactories;
@@ -88,12 +90,25 @@ public class NestedAggregator extends BucketsAggregator implements SingleBucketA
8890
) throws IOException {
8991
super(name, factories, context, parent, cardinality, metadata);
9092

91-
Query parentFilter = parentObjectMapper != null ? parentObjectMapper.nestedTypeFilter() : Queries.newNonNestedFilter();
93+
Query parentFilter = isParent(parentObjectMapper, childObjectMapper, context.mapperService())
94+
? parentObjectMapper.nestedTypeFilter()
95+
: Queries.newNonNestedFilter();
9296
this.parentFilter = context.bitsetFilterCache().getBitSetProducer(parentFilter);
9397
this.childFilter = childObjectMapper.nestedTypeFilter();
9498
this.collectsFromSingleBucket = cardinality.map(estimate -> estimate < 2);
9599
}
96100

101+
private boolean isParent(ObjectMapper parentObjectMapper, ObjectMapper childObjectMapper, MapperService mapperService) {
102+
if (parentObjectMapper == null) {
103+
return false;
104+
}
105+
ObjectMapper parent;
106+
do {
107+
parent = childObjectMapper.getParentObjectMapper(mapperService);
108+
} while (parent != null && parent != parentObjectMapper);
109+
return parentObjectMapper == parent;
110+
}
111+
97112
@Override
98113
public LeafBucketCollector getLeafCollector(final LeafReaderContext ctx, final LeafBucketCollector sub) throws IOException {
99114
IndexReaderContext topLevelContext = ReaderUtil.getTopLevelContext(ctx);
@@ -107,20 +122,17 @@ public LeafBucketCollector getLeafCollector(final LeafReaderContext ctx, final L
107122
if (collectsFromSingleBucket) {
108123
return new LeafBucketCollectorBase(sub, null) {
109124
@Override
110-
public void collect(int parentDoc, long bucket) throws IOException {
111-
// if parentDoc is 0 then this means that this parent doesn't have child docs (b/c these appear always before the parent
112-
// doc), so we can skip:
113-
if (parentDoc == 0 || parentDocs == null || childDocs == null) {
125+
public void collect(int parentAggDoc, long bucket) throws IOException {
126+
// parentAggDoc can be 0 when aggregation:
127+
if (parentDocs == null || childDocs == null) {
114128
return;
115129
}
116130

117-
final int prevParentDoc = parentDocs.prevSetBit(parentDoc - 1);
118-
int childDocId = childDocs.docID();
119-
if (childDocId <= prevParentDoc) {
120-
childDocId = childDocs.advance(prevParentDoc + 1);
121-
}
131+
Tuple<Integer, Integer> res = getParentAndChildId(parentDocs, childDocs, parentAggDoc);
132+
int currentParentDoc = res.v1();
133+
int childDocId = res.v2();
122134

123-
for (; childDocId < parentDoc; childDocId = childDocs.nextDoc()) {
135+
for (; childDocId < currentParentDoc; childDocId = childDocs.nextDoc()) {
124136
collectBucket(sub, childDocId, bucket);
125137
}
126138
}
@@ -130,6 +142,43 @@ public void collect(int parentDoc, long bucket) throws IOException {
130142
}
131143
}
132144

145+
/**
146+
* In one case, it's talking about the parent doc (from the Lucene block-join standpoint),
147+
* while in the other case, it's talking about a child doc ID (from the block-join standpoint)
148+
* from the parent aggregation, where we're trying to aggregate over a sibling of that child.
149+
* So, we need to map from that document to its parent, then join to the appropriate sibling.
150+
*
151+
* @param parentAggDoc the parent aggregation's current doc
152+
* (which may or may not be a block-level parent doc)
153+
* @return a tuple consisting of the current block-level parent doc (the parent of the
154+
* parameter doc), and the next matching child doc (hopefully under this parent)
155+
* for the aggregation (according to the child doc iterator).
156+
*/
157+
static Tuple<Integer, Integer> getParentAndChildId(BitSet parentDocs, DocIdSetIterator childDocs, int parentAggDoc) throws IOException {
158+
int currentParentAggDoc;
159+
int prevParentDoc = parentDocs.prevSetBit(parentAggDoc);
160+
if (prevParentDoc == -1) {
161+
currentParentAggDoc = parentDocs.nextSetBit(0);
162+
} else if (prevParentDoc == parentAggDoc) {
163+
// parentAggDoc is the parent of that child, and is belongs to parentDocs
164+
currentParentAggDoc = parentAggDoc;
165+
if (currentParentAggDoc == 0) {
166+
prevParentDoc = -1;
167+
} else {
168+
prevParentDoc = parentDocs.prevSetBit(currentParentAggDoc - 1);
169+
}
170+
} else {
171+
// parentAggDoc is the sibling of that child, and it means the block-join parent
172+
currentParentAggDoc = parentDocs.nextSetBit(prevParentDoc + 1);
173+
}
174+
175+
int childDocId = childDocs.docID();
176+
if (childDocId <= prevParentDoc) {
177+
childDocId = childDocs.advance(prevParentDoc + 1);
178+
}
179+
return Tuple.tuple(currentParentAggDoc, childDocId);
180+
}
181+
133182
@Override
134183
protected void preGetSubLeafCollectors(LeafReaderContext ctx) throws IOException {
135184
super.preGetSubLeafCollectors(ctx);
@@ -191,9 +240,8 @@ public void setScorer(Scorable scorer) throws IOException {
191240

192241
@Override
193242
public void collect(int parentDoc, long bucket) throws IOException {
194-
// if parentDoc is 0 then this means that this parent doesn't have child docs (b/c these appear always before the parent
195-
// doc), so we can skip:
196-
if (parentDoc == 0 || parentDocs == null || childDocs == null) {
243+
// parentAggDoc can be 0 when aggregation:
244+
if (parentDocs == null || childDocs == null) {
197245
return;
198246
}
199247

@@ -214,11 +262,9 @@ void processBufferedChildBuckets() throws IOException {
214262
return;
215263
}
216264

217-
final int prevParentDoc = parentDocs.prevSetBit(currentParentDoc - 1);
218-
int childDocId = childDocs.docID();
219-
if (childDocId <= prevParentDoc) {
220-
childDocId = childDocs.advance(prevParentDoc + 1);
221-
}
265+
Tuple<Integer, Integer> res = getParentAndChildId(parentDocs, childDocs, currentParentDoc);
266+
int currentParentDoc = res.v1();
267+
int childDocId = res.v2();
222268

223269
for (; childDocId < currentParentDoc; childDocId = childDocs.nextDoc()) {
224270
cachedScorer.doc = childDocId;

0 commit comments

Comments
 (0)