Skip to content

Commit 57fb50b

Browse files
Apply the date histogram rewrite optimization to range aggregation (#13865)
* Refactor the ranges representation Signed-off-by: bowenlan-amzn <bowenlan23@gmail.com> * Refactor try fast filter Signed-off-by: bowenlan-amzn <bowenlan23@gmail.com> * Main work finished; left the handling of different numeric data types Signed-off-by: bowenlan-amzn <bowenlan23@gmail.com> * buildRanges accepts field type Signed-off-by: bowenlan-amzn <bowenlan23@gmail.com> * first working draft probably Signed-off-by: bowenlan-amzn <bowenlan23@gmail.com> * add change log Signed-off-by: bowenlan-amzn <bowenlan23@gmail.com> * accommodate geo distance agg Signed-off-by: bowenlan-amzn <bowenlan23@gmail.com> * Fix test support all numeric types minus one on the upper range Signed-off-by: bowenlan-amzn <bowenlan23@gmail.com> * [Refactor] range is lower inclusive, right exclusive Signed-off-by: bowenlan-amzn <bowenlan23@gmail.com> * adding test Signed-off-by: bowenlan-amzn <bowenlan23@gmail.com> * Adding test and refactor Signed-off-by: bowenlan-amzn <bowenlan23@gmail.com> * refactor Signed-off-by: bowenlan-amzn <bowenlan23@gmail.com> * add test Signed-off-by: bowenlan-amzn <bowenlan23@gmail.com> * add test and update the compare logic in tree traversal Signed-off-by: bowenlan-amzn <bowenlan23@gmail.com> * fix test, add random test Signed-off-by: bowenlan-amzn <bowenlan23@gmail.com> * refactor to address comments Signed-off-by: bowenlan-amzn <bowenlan23@gmail.com> * small potential performance update Signed-off-by: bowenlan-amzn <bowenlan23@gmail.com> * fix precommit Signed-off-by: bowenlan-amzn <bowenlan23@gmail.com> * refactor Signed-off-by: bowenlan-amzn <bowenlan23@gmail.com> * refactor Signed-off-by: bowenlan-amzn <bowenlan23@gmail.com> * set refresh_interval to -1 Signed-off-by: bowenlan-amzn <bowenlan23@gmail.com> * address comment Signed-off-by: bowenlan-amzn <bowenlan23@gmail.com> * address comment Signed-off-by: bowenlan-amzn <bowenlan23@gmail.com> * address comment Signed-off-by: bowenlan-amzn <bowenlan23@gmail.com> * Fix test To understand fully about the double and bigdecimal usage in scaled float field will take more time. Signed-off-by: bowenlan-amzn <bowenlan23@gmail.com> --------- Signed-off-by: bowenlan-amzn <bowenlan23@gmail.com>
1 parent bd56456 commit 57fb50b

File tree

17 files changed

+902
-232
lines changed

17 files changed

+902
-232
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
77
### Added
88
- Add fingerprint ingest processor ([#13724](https://github.com/opensearch-project/OpenSearch/pull/13724))
99
- [Remote Store] Rate limiter for remote store low priority uploads ([#14374](https://github.com/opensearch-project/OpenSearch/pull/14374/))
10+
- Apply the date histogram rewrite optimization to range aggregation ([#13865](https://github.com/opensearch-project/OpenSearch/pull/13865))
1011

1112
### Dependencies
1213
- Bump `org.gradle.test-retry` from 1.5.8 to 1.5.9 ([#13442](https://github.com/opensearch-project/OpenSearch/pull/13442))

modules/mapper-extras/src/main/java/org/opensearch/index/mapper/ScaledFloatFieldMapper.java

+17-1
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
import com.fasterxml.jackson.core.JsonParseException;
3636

3737
import org.apache.lucene.document.Field;
38+
import org.apache.lucene.document.LongPoint;
3839
import org.apache.lucene.index.DocValues;
3940
import org.apache.lucene.index.LeafReaderContext;
4041
import org.apache.lucene.index.NumericDocValues;
@@ -165,7 +166,7 @@ public ScaledFloatFieldMapper build(BuilderContext context) {
165166

166167
public static final TypeParser PARSER = new TypeParser((n, c) -> new Builder(n, c.getSettings()));
167168

168-
public static final class ScaledFloatFieldType extends SimpleMappedFieldType {
169+
public static final class ScaledFloatFieldType extends SimpleMappedFieldType implements NumericPointEncoder {
169170

170171
private final double scalingFactor;
171172
private final Double nullValue;
@@ -188,6 +189,21 @@ public ScaledFloatFieldType(String name, double scalingFactor) {
188189
this(name, true, false, true, Collections.emptyMap(), scalingFactor, null);
189190
}
190191

192+
@Override
193+
public byte[] encodePoint(Number value) {
194+
assert value instanceof Double;
195+
double doubleValue = (Double) value;
196+
byte[] point = new byte[Long.BYTES];
197+
if (doubleValue == Double.POSITIVE_INFINITY) {
198+
LongPoint.encodeDimension(Long.MAX_VALUE, point, 0);
199+
} else if (doubleValue == Double.NEGATIVE_INFINITY) {
200+
LongPoint.encodeDimension(Long.MIN_VALUE, point, 0);
201+
} else {
202+
LongPoint.encodeDimension(Math.round(scale(value)), point, 0);
203+
}
204+
return point;
205+
}
206+
191207
public double getScalingFactor() {
192208
return scalingFactor;
193209
}

rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/40_range.yml

+139
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@ setup:
1414
date:
1515
type: date
1616
format: epoch_second
17+
scaled_field:
18+
type: scaled_float
19+
scaling_factor: 100
1720

1821
- do:
1922
cluster.health:
@@ -528,3 +531,139 @@ setup:
528531
- is_false: aggregations.unsigned_long_range.buckets.2.to
529532

530533
- match: { aggregations.unsigned_long_range.buckets.2.doc_count: 0 }
534+
535+
---
536+
"Double range profiler shows filter rewrite info":
537+
- skip:
538+
version: " - 2.99.99"
539+
reason: debug info for filter rewrite added in 3.0.0 (to be backported to 2.15.0)
540+
541+
- do:
542+
indices.create:
543+
index: test_profile
544+
body:
545+
settings:
546+
number_of_replicas: 0
547+
refresh_interval: -1
548+
mappings:
549+
properties:
550+
ip:
551+
type: ip
552+
double:
553+
type: double
554+
date:
555+
type: date
556+
format: epoch_second
557+
558+
- do:
559+
bulk:
560+
index: test_profile
561+
refresh: true
562+
body:
563+
- '{"index": {}}'
564+
- '{"double" : 42}'
565+
- '{"index": {}}'
566+
- '{"double" : 100}'
567+
- '{"index": {}}'
568+
- '{"double" : 50}'
569+
570+
- do:
571+
search:
572+
index: test_profile
573+
body:
574+
size: 0
575+
profile: true
576+
aggs:
577+
double_range:
578+
range:
579+
field: double
580+
ranges:
581+
- to: 50
582+
- from: 50
583+
to: 150
584+
- from: 150
585+
586+
- length: { aggregations.double_range.buckets: 3 }
587+
588+
- match: { aggregations.double_range.buckets.0.key: "*-50.0" }
589+
- is_false: aggregations.double_range.buckets.0.from
590+
- match: { aggregations.double_range.buckets.0.to: 50.0 }
591+
- match: { aggregations.double_range.buckets.0.doc_count: 1 }
592+
- match: { aggregations.double_range.buckets.1.key: "50.0-150.0" }
593+
- match: { aggregations.double_range.buckets.1.from: 50.0 }
594+
- match: { aggregations.double_range.buckets.1.to: 150.0 }
595+
- match: { aggregations.double_range.buckets.1.doc_count: 2 }
596+
- match: { aggregations.double_range.buckets.2.key: "150.0-*" }
597+
- match: { aggregations.double_range.buckets.2.from: 150.0 }
598+
- is_false: aggregations.double_range.buckets.2.to
599+
- match: { aggregations.double_range.buckets.2.doc_count: 0 }
600+
601+
- match: { profile.shards.0.aggregations.0.debug.optimized_segments: 1 }
602+
- match: { profile.shards.0.aggregations.0.debug.unoptimized_segments: 0 }
603+
- match: { profile.shards.0.aggregations.0.debug.leaf_visited: 1 }
604+
- match: { profile.shards.0.aggregations.0.debug.inner_visited: 0 }
605+
606+
---
607+
"Scaled Float Range Aggregation":
608+
- do:
609+
index:
610+
index: test
611+
id: 1
612+
body: { "scaled_field": 1 }
613+
614+
- do:
615+
index:
616+
index: test
617+
id: 2
618+
body: { "scaled_field": 1.53 }
619+
620+
- do:
621+
index:
622+
index: test
623+
id: 3
624+
body: { "scaled_field": -2.1 }
625+
626+
- do:
627+
index:
628+
index: test
629+
id: 4
630+
body: { "scaled_field": 1.53 }
631+
632+
- do:
633+
indices.refresh: { }
634+
635+
- do:
636+
search:
637+
index: test
638+
body:
639+
size: 0
640+
aggs:
641+
my_range:
642+
range:
643+
field: scaled_field
644+
ranges:
645+
- to: 0
646+
- from: 0
647+
to: 1
648+
- from: 1
649+
to: 1.5
650+
- from: 1.5
651+
652+
- length: { aggregations.my_range.buckets: 4 }
653+
654+
- match: { aggregations.my_range.buckets.0.key: "*-0.0" }
655+
- is_false: aggregations.my_range.buckets.0.from
656+
- match: { aggregations.my_range.buckets.0.to: 0.0 }
657+
- match: { aggregations.my_range.buckets.0.doc_count: 1 }
658+
- match: { aggregations.my_range.buckets.1.key: "0.0-1.0" }
659+
- match: { aggregations.my_range.buckets.1.from: 0.0 }
660+
- match: { aggregations.my_range.buckets.1.to: 1.0 }
661+
- match: { aggregations.my_range.buckets.1.doc_count: 0 }
662+
- match: { aggregations.my_range.buckets.2.key: "1.0-1.5" }
663+
- match: { aggregations.my_range.buckets.2.from: 1.0 }
664+
- match: { aggregations.my_range.buckets.2.to: 1.5 }
665+
- match: { aggregations.my_range.buckets.2.doc_count: 1 }
666+
- match: { aggregations.my_range.buckets.3.key: "1.5-*" }
667+
- match: { aggregations.my_range.buckets.3.from: 1.5 }
668+
- is_false: aggregations.my_range.buckets.3.to
669+
- match: { aggregations.my_range.buckets.3.doc_count: 2 }

server/src/main/java/org/opensearch/index/mapper/DateFieldMapper.java

+8-1
Original file line numberDiff line numberDiff line change
@@ -348,7 +348,7 @@ public DateFieldMapper build(BuilderContext context) {
348348
*
349349
* @opensearch.internal
350350
*/
351-
public static final class DateFieldType extends MappedFieldType {
351+
public static final class DateFieldType extends MappedFieldType implements NumericPointEncoder {
352352
protected final DateFormatter dateTimeFormatter;
353353
protected final DateMathParser dateMathParser;
354354
protected final Resolution resolution;
@@ -549,6 +549,13 @@ public static long parseToLong(
549549
return resolution.convert(dateParser.parse(BytesRefs.toString(value), now, roundUp, zone));
550550
}
551551

552+
@Override
553+
public byte[] encodePoint(Number value) {
554+
byte[] point = new byte[Long.BYTES];
555+
LongPoint.encodeDimension(value.longValue(), point, 0);
556+
return point;
557+
}
558+
552559
@Override
553560
public Query distanceFeatureQuery(Object origin, String pivot, float boost, QueryShardContext context) {
554561
failIfNotIndexedAndNoDocValues();

0 commit comments

Comments
 (0)