Skip to content

Commit a845d85

Browse files
committed
Fix missing bucket in terms aggregation with missing value
Signed-off-by: kkewwei <kewei.11@bytedance.com> Signed-off-by: kkewwei <kkewwei@163.com>
1 parent 664f254 commit a845d85

File tree

3 files changed

+96
-1
lines changed

3 files changed

+96
-1
lines changed

CHANGELOG-3.0.md

+1
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
7070
- Don't over-allocate in HeapBufferedAsyncEntityConsumer in order to consume the response ([#9993](https://github.com/opensearch-project/OpenSearch/pull/9993))
7171
- Fix swapped field formats in nodes API where `total_indexing_buffer_in_bytes` and `total_indexing_buffer` values were reversed ([#17070](https://github.com/opensearch-project/OpenSearch/pull/17070))
7272
- Add HTTP/2 protocol support to HttpRequest.HttpVersion ([#17248](https://github.com/opensearch-project/OpenSearch/pull/17248))
73+
- Fix missing bucket in terms aggregation with missing value ([#17418](https://github.com/opensearch-project/OpenSearch/pull/17418))
7374

7475
### Security
7576

server/src/main/java/org/opensearch/search/aggregations/support/MissingValues.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -359,7 +359,7 @@ public long getValueCount() {
359359

360360
@Override
361361
public int docValueCount() {
362-
return values.docValueCount();
362+
return Math.max(1, values.docValueCount());
363363
}
364364

365365
@Override

server/src/test/java/org/opensearch/search/aggregations/bucket/terms/TermsAggregatorTests.java

+94
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
import org.apache.lucene.document.BinaryDocValuesField;
3535
import org.apache.lucene.document.Document;
3636
import org.apache.lucene.document.Field;
37+
import org.apache.lucene.document.FieldType;
3738
import org.apache.lucene.document.InetAddressPoint;
3839
import org.apache.lucene.document.LatLonDocValuesField;
3940
import org.apache.lucene.document.NumericDocValuesField;
@@ -42,6 +43,8 @@
4243
import org.apache.lucene.document.SortedSetDocValuesField;
4344
import org.apache.lucene.document.StringField;
4445
import org.apache.lucene.index.DirectoryReader;
46+
import org.apache.lucene.index.DocValuesType;
47+
import org.apache.lucene.index.IndexOptions;
4548
import org.apache.lucene.index.IndexReader;
4649
import org.apache.lucene.index.IndexableField;
4750
import org.apache.lucene.index.NoMergePolicy;
@@ -75,6 +78,8 @@
7578
import org.opensearch.index.mapper.RangeFieldMapper;
7679
import org.opensearch.index.mapper.RangeType;
7780
import org.opensearch.index.mapper.SeqNoFieldMapper;
81+
import org.opensearch.index.mapper.TextFieldMapper;
82+
import org.opensearch.index.mapper.TextParams;
7883
import org.opensearch.index.mapper.Uid;
7984
import org.opensearch.index.query.MatchAllQueryBuilder;
8085
import org.opensearch.index.query.QueryBuilders;
@@ -1578,6 +1583,95 @@ public void testOrderByPipelineAggregation() throws Exception {
15781583
}
15791584
}
15801585

1586+
public void testBucketInTermsAggregationWithMissingValue() throws IOException {
1587+
try (Directory directory = newDirectory()) {
1588+
try (RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory)) {
1589+
// test text
1590+
{
1591+
FieldType type = TextParams.buildFieldType(() -> true, () -> false, () -> "positions", () -> false, () -> "no");
1592+
Document document = new Document();
1593+
document.add(new Field("mv_field", "name1", type));
1594+
document.add(new Field("mv_field", "name2", type));
1595+
indexWriter.addDocument(document);
1596+
document = new Document();
1597+
document.add(new Field("mv_field1", "value1", type));
1598+
indexWriter.addDocument(document);
1599+
document = new Document();
1600+
document.add(new Field("mv_field1", "value2", type));
1601+
indexWriter.addDocument(document);
1602+
indexWriter.flush();
1603+
try (IndexReader indexReader = maybeWrapReaderEs(indexWriter.getReader())) {
1604+
IndexSearcher indexSearcher = newIndexSearcher(indexReader);
1605+
TextFieldMapper.TextFieldType fieldType = new TextFieldMapper.TextFieldType("mv_field");
1606+
fieldType.setFielddata(true);
1607+
1608+
TermsAggregationBuilder aggregationBuilder = new TermsAggregationBuilder("nick").userValueTypeHint(ValueType.STRING)
1609+
.field("mv_field")
1610+
.missing("no_nickname");
1611+
TermsAggregator aggregator = createAggregator(aggregationBuilder, indexSearcher, fieldType);
1612+
assertThat(aggregator, instanceOf(GlobalOrdinalsStringTermsAggregator.class));
1613+
1614+
aggregator.preCollection();
1615+
indexSearcher.search(new MatchAllDocsQuery(), aggregator);
1616+
aggregator.postCollection();
1617+
Terms result = reduce(aggregator);
1618+
assertEquals(3, result.getBuckets().size());
1619+
assertEquals("no_nickname", result.getBuckets().get(0).getKeyAsString());
1620+
assertEquals(2L, result.getBuckets().get(0).getDocCount());
1621+
assertEquals("name1", result.getBuckets().get(1).getKeyAsString());
1622+
assertEquals(1L, result.getBuckets().get(1).getDocCount());
1623+
assertEquals("name2", result.getBuckets().get(2).getKeyAsString());
1624+
assertEquals(1L, result.getBuckets().get(2).getDocCount());
1625+
1626+
}
1627+
indexWriter.deleteAll();
1628+
}
1629+
1630+
// test keyword
1631+
{
1632+
FieldType fieldtype = new FieldType(KeywordFieldMapper.Defaults.FIELD_TYPE);
1633+
fieldtype.setDocValuesType(DocValuesType.SORTED_SET);
1634+
fieldtype.setIndexOptions(IndexOptions.NONE);
1635+
fieldtype.setStored(true);
1636+
1637+
Document document = new Document();
1638+
document.add(new SortedSetDocValuesField("mv_field1", new BytesRef("name1")));
1639+
document.add(new SortedSetDocValuesField("mv_field1", new BytesRef("name2")));
1640+
indexWriter.addDocument(document);
1641+
document = new Document();
1642+
document.add(new SortedSetDocValuesField("mv_field2", new BytesRef("value1")));
1643+
indexWriter.addDocument(document);
1644+
document = new Document();
1645+
document.add(new SortedSetDocValuesField("mv_field2", new BytesRef("value2")));
1646+
indexWriter.addDocument(document);
1647+
indexWriter.flush();
1648+
try (IndexReader indexReader = maybeWrapReaderEs(indexWriter.getReader())) {
1649+
IndexSearcher indexSearcher = newIndexSearcher(indexReader);
1650+
KeywordFieldMapper.KeywordFieldType fieldType = new KeywordFieldMapper.KeywordFieldType("mv_field1");
1651+
1652+
TermsAggregationBuilder aggregationBuilder = new TermsAggregationBuilder("_name").userValueTypeHint(
1653+
ValueType.STRING
1654+
).field("mv_field1").missing("no_nickname1");
1655+
TermsAggregator aggregator = createAggregator(aggregationBuilder, indexSearcher, fieldType);
1656+
assertThat(aggregator, instanceOf(GlobalOrdinalsStringTermsAggregator.class));
1657+
1658+
aggregator.preCollection();
1659+
indexSearcher.search(new MatchAllDocsQuery(), aggregator);
1660+
aggregator.postCollection();
1661+
Terms result = reduce(aggregator);
1662+
assertEquals(3, result.getBuckets().size());
1663+
assertEquals("no_nickname1", result.getBuckets().get(0).getKeyAsString());
1664+
assertEquals(2L, result.getBuckets().get(0).getDocCount());
1665+
assertEquals("name1", result.getBuckets().get(1).getKeyAsString());
1666+
assertEquals(1L, result.getBuckets().get(1).getDocCount());
1667+
assertEquals("name2", result.getBuckets().get(2).getKeyAsString());
1668+
assertEquals(1L, result.getBuckets().get(2).getDocCount());
1669+
}
1670+
}
1671+
}
1672+
}
1673+
}
1674+
15811675
private final SeqNoFieldMapper.SequenceIDFields sequenceIDFields = SeqNoFieldMapper.SequenceIDFields.emptySeqID();
15821676

15831677
private List<Document> generateDocsWithNested(String id, int value, int[] nestedValues) {

0 commit comments

Comments
 (0)