Skip to content

Commit 5a8a822

Browse files
prudhvigodithimingshl
authored andcommitted
Overflow prevention (opensearch-project#16812)
Signed-off-by: Prudhvi Godithi <pgodithi@amazon.com>
1 parent b63d03a commit 5a8a822

File tree

6 files changed

+243
-4
lines changed

6 files changed

+243
-4
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
2222
- Add new configuration setting `synonym_analyzer`, to the `synonym` and `synonym_graph` filters, enabling the specification of a custom analyzer for reading the synonym file ([#16488](https://github.com/opensearch-project/OpenSearch/pull/16488)).
2323
- Add stats for remote publication failure and move download failure stats to remote methods([#16682](https://github.com/opensearch-project/OpenSearch/pull/16682/))
2424
- Introduce Template query ([#16818](https://github.com/opensearch-project/OpenSearch/pull/16818))
25+
- Added a precaution to handle extreme date values during sorting to prevent `arithmetic_exception: long overflow` ([#16812](https://github.com/opensearch-project/OpenSearch/pull/16812)).
2526

2627
### Dependencies
2728
- Bump `com.google.cloud:google-cloud-core-http` from 2.23.0 to 2.47.0 ([#16504](https://github.com/opensearch-project/OpenSearch/pull/16504))

server/src/main/java/org/opensearch/common/time/DateUtils.java

+24
Original file line numberDiff line numberDiff line change
@@ -272,6 +272,30 @@ public static Instant clampToNanosRange(Instant instant) {
272272
return instant;
273273
}
274274

275+
static final Instant INSTANT_LONG_MIN_VALUE = Instant.ofEpochMilli(Long.MIN_VALUE);
276+
static final Instant INSTANT_LONG_MAX_VALUE = Instant.ofEpochMilli(Long.MAX_VALUE);
277+
278+
/**
279+
* Clamps the given {@link Instant} to the valid epoch millisecond range.
280+
*
281+
* - If the input is before {@code Long.MIN_VALUE}, it returns {@code Instant.ofEpochMilli(Long.MIN_VALUE)}.
282+
* - If the input is after {@code Long.MAX_VALUE}, it returns {@code Instant.ofEpochMilli(Long.MAX_VALUE)}.
283+
* - Otherwise, it returns the input as-is.
284+
*
285+
* @param instant the {@link Instant} to clamp
286+
* @return the clamped {@link Instant}
287+
* @throws NullPointerException if the input is {@code null}
288+
*/
289+
public static Instant clampToMillisRange(Instant instant) {
290+
if (instant.isBefore(INSTANT_LONG_MIN_VALUE)) {
291+
return INSTANT_LONG_MIN_VALUE;
292+
}
293+
if (instant.isAfter(INSTANT_LONG_MAX_VALUE)) {
294+
return INSTANT_LONG_MAX_VALUE;
295+
}
296+
return instant;
297+
}
298+
275299
/**
276300
* convert a long value to a java time instant
277301
* the long value resembles the nanoseconds since the epoch

server/src/main/java/org/opensearch/index/mapper/DateFieldMapper.java

+2-2
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ public enum Resolution {
122122
MILLISECONDS(CONTENT_TYPE, NumericType.DATE) {
123123
@Override
124124
public long convert(Instant instant) {
125-
return instant.toEpochMilli();
125+
return clampToValidRange(instant).toEpochMilli();
126126
}
127127

128128
@Override
@@ -132,7 +132,7 @@ public Instant toInstant(long value) {
132132

133133
@Override
134134
public Instant clampToValidRange(Instant instant) {
135-
return instant;
135+
return DateUtils.clampToMillisRange(instant);
136136
}
137137

138138
@Override

server/src/test/java/org/opensearch/common/time/DateUtilsTests.java

+17
Original file line numberDiff line numberDiff line change
@@ -260,4 +260,21 @@ public void testRoundYear() {
260260
long startOf1996 = Year.of(1996).atDay(1).atStartOfDay().toInstant(ZoneOffset.UTC).toEpochMilli();
261261
assertThat(DateUtils.roundYear(endOf1996), is(startOf1996));
262262
}
263+
264+
public void testClampToMillisRange() {
265+
Instant normalInstant = Instant.now();
266+
assertEquals(normalInstant, DateUtils.clampToMillisRange(normalInstant));
267+
268+
Instant beforeMinInstant = DateUtils.INSTANT_LONG_MIN_VALUE.minusMillis(1);
269+
assertEquals(DateUtils.INSTANT_LONG_MIN_VALUE, DateUtils.clampToMillisRange(beforeMinInstant));
270+
271+
Instant afterMaxInstant = DateUtils.INSTANT_LONG_MAX_VALUE.plusMillis(1);
272+
assertEquals(DateUtils.INSTANT_LONG_MAX_VALUE, DateUtils.clampToMillisRange(afterMaxInstant));
273+
274+
assertEquals(DateUtils.INSTANT_LONG_MIN_VALUE, DateUtils.clampToMillisRange(DateUtils.INSTANT_LONG_MIN_VALUE));
275+
276+
assertEquals(DateUtils.INSTANT_LONG_MAX_VALUE, DateUtils.clampToMillisRange(DateUtils.INSTANT_LONG_MAX_VALUE));
277+
278+
assertThrows(NullPointerException.class, () -> DateUtils.clampToMillisRange(null));
279+
}
263280
}

server/src/test/java/org/opensearch/index/mapper/DateFieldMapperTests.java

-2
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,6 @@ public void testIgnoreMalformedLegacy() throws IOException {
156156
"failed to parse date field [2016-03-99] with format [strict_date_optional_time||epoch_millis]"
157157
);
158158
testIgnoreMalformedForValue("-2147483648", "Invalid value for Year (valid values -999999999 - 999999999): -2147483648");
159-
testIgnoreMalformedForValue("-522000000", "long overflow");
160159
}
161160

162161
public void testIgnoreMalformed() throws IOException {
@@ -170,7 +169,6 @@ public void testIgnoreMalformed() throws IOException {
170169
"failed to parse date field [2016-03-99] with format [strict_date_time_no_millis||strict_date_optional_time||epoch_millis]"
171170
);
172171
testIgnoreMalformedForValue("-2147483648", "Invalid value for Year (valid values -999999999 - 999999999): -2147483648");
173-
testIgnoreMalformedForValue("-522000000", "long overflow");
174172
}
175173

176174
private void testIgnoreMalformedForValue(String value, String expectedCause) throws IOException {

server/src/test/java/org/opensearch/index/mapper/DateFieldTypeTests.java

+199
Original file line numberDiff line numberDiff line change
@@ -31,20 +31,32 @@
3131

3232
package org.opensearch.index.mapper;
3333

34+
import org.apache.lucene.document.Field;
3435
import org.apache.lucene.document.LongPoint;
3536
import org.apache.lucene.document.NumericDocValuesField;
3637
import org.apache.lucene.document.SortedNumericDocValuesField;
38+
import org.apache.lucene.document.StoredField;
39+
import org.apache.lucene.document.StringField;
3740
import org.apache.lucene.index.DirectoryReader;
3841
import org.apache.lucene.index.IndexReader;
3942
import org.apache.lucene.index.IndexWriter;
4043
import org.apache.lucene.index.IndexWriterConfig;
44+
import org.apache.lucene.index.IndexableField;
4145
import org.apache.lucene.index.MultiReader;
4246
import org.apache.lucene.index.SortedNumericDocValues;
47+
import org.apache.lucene.index.Term;
48+
import org.apache.lucene.search.BooleanClause;
49+
import org.apache.lucene.search.BooleanQuery;
4350
import org.apache.lucene.search.DocIdSetIterator;
4451
import org.apache.lucene.search.IndexOrDocValuesQuery;
4552
import org.apache.lucene.search.IndexSearcher;
4653
import org.apache.lucene.search.IndexSortSortedNumericDocValuesRangeQuery;
4754
import org.apache.lucene.search.Query;
55+
import org.apache.lucene.search.ScoreDoc;
56+
import org.apache.lucene.search.Sort;
57+
import org.apache.lucene.search.SortField;
58+
import org.apache.lucene.search.TermQuery;
59+
import org.apache.lucene.search.TopDocs;
4860
import org.apache.lucene.store.Directory;
4961
import org.opensearch.Version;
5062
import org.opensearch.cluster.metadata.IndexMetadata;
@@ -72,8 +84,12 @@
7284
import org.joda.time.DateTimeZone;
7385

7486
import java.io.IOException;
87+
import java.time.Instant;
7588
import java.time.ZoneOffset;
89+
import java.util.Arrays;
7690
import java.util.Collections;
91+
import java.util.List;
92+
import java.util.Locale;
7793

7894
import static org.hamcrest.CoreMatchers.is;
7995
import static org.apache.lucene.document.LongPoint.pack;
@@ -491,4 +507,187 @@ public void testParseSourceValueNanos() throws IOException {
491507
MappedFieldType nullValueMapper = fieldType(Resolution.NANOSECONDS, "strict_date_time||epoch_millis", nullValueDate);
492508
assertEquals(Collections.singletonList(nullValueDate), fetchSourceValue(nullValueMapper, null));
493509
}
510+
511+
public void testDateResolutionForOverflow() throws IOException {
512+
Directory dir = newDirectory();
513+
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(null));
514+
515+
DateFieldType ft = new DateFieldType(
516+
"test_date",
517+
true,
518+
true,
519+
true,
520+
DateFormatter.forPattern("yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis||strict_date_optional_time"),
521+
Resolution.MILLISECONDS,
522+
null,
523+
Collections.emptyMap()
524+
);
525+
526+
List<String> dates = Arrays.asList(
527+
null,
528+
"2020-01-01T00:00:00Z",
529+
null,
530+
"2021-01-01T00:00:00Z",
531+
"+292278994-08-17T07:12:55.807Z",
532+
null,
533+
"-292275055-05-16T16:47:04.192Z"
534+
);
535+
536+
int numNullDates = 0;
537+
long minDateValue = Long.MAX_VALUE;
538+
long maxDateValue = Long.MIN_VALUE;
539+
540+
for (int i = 0; i < dates.size(); i++) {
541+
ParseContext.Document doc = new ParseContext.Document();
542+
String dateStr = dates.get(i);
543+
544+
if (dateStr != null) {
545+
long timestamp = Resolution.MILLISECONDS.convert(DateFormatters.from(ft.dateTimeFormatter().parse(dateStr)).toInstant());
546+
doc.add(new LongPoint(ft.name(), timestamp));
547+
doc.add(new SortedNumericDocValuesField(ft.name(), timestamp));
548+
doc.add(new StoredField(ft.name(), timestamp));
549+
doc.add(new StoredField("id", i));
550+
minDateValue = Math.min(minDateValue, timestamp);
551+
maxDateValue = Math.max(maxDateValue, timestamp);
552+
} else {
553+
numNullDates++;
554+
doc.add(new StoredField("id", i));
555+
}
556+
w.addDocument(doc);
557+
}
558+
559+
DirectoryReader reader = DirectoryReader.open(w);
560+
IndexSearcher searcher = new IndexSearcher(reader);
561+
562+
Settings indexSettings = Settings.builder()
563+
.put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT)
564+
.put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1)
565+
.put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1)
566+
.build();
567+
QueryShardContext context = new QueryShardContext(
568+
0,
569+
new IndexSettings(IndexMetadata.builder("foo").settings(indexSettings).build(), indexSettings),
570+
BigArrays.NON_RECYCLING_INSTANCE,
571+
null,
572+
null,
573+
null,
574+
null,
575+
null,
576+
xContentRegistry(),
577+
writableRegistry(),
578+
null,
579+
null,
580+
() -> nowInMillis,
581+
null,
582+
null,
583+
() -> true,
584+
null
585+
);
586+
587+
Query rangeQuery = ft.rangeQuery(
588+
"-292275055-05-16T16:47:04.192Z",
589+
"+292278994-08-17T07:12:55.807Z",
590+
true,
591+
true,
592+
null,
593+
null,
594+
null,
595+
context
596+
);
597+
598+
TopDocs topDocs = searcher.search(rangeQuery, dates.size());
599+
assertEquals("Number of non-null date documents", dates.size() - numNullDates, topDocs.totalHits.value);
600+
601+
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
602+
org.apache.lucene.document.Document doc = reader.document(scoreDoc.doc);
603+
IndexableField dateField = doc.getField(ft.name());
604+
if (dateField != null) {
605+
long dateValue = dateField.numericValue().longValue();
606+
assertTrue(
607+
"Date value " + dateValue + " should be within valid range",
608+
dateValue >= minDateValue && dateValue <= maxDateValue
609+
);
610+
}
611+
}
612+
613+
DateFieldType ftWithNullValue = new DateFieldType(
614+
"test_date",
615+
true,
616+
true,
617+
true,
618+
DateFormatter.forPattern("yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis||strict_date_optional_time"),
619+
Resolution.MILLISECONDS,
620+
"2020-01-01T00:00:00Z",
621+
Collections.emptyMap()
622+
);
623+
624+
Query nullValueQuery = ftWithNullValue.termQuery("2020-01-01T00:00:00Z", context);
625+
topDocs = searcher.search(nullValueQuery, dates.size());
626+
assertEquals("Documents matching the 2020-01-01 date", 1, topDocs.totalHits.value);
627+
628+
IOUtils.close(reader, w, dir);
629+
}
630+
631+
public void testDateFieldTypeWithNulls() throws IOException {
632+
DateFieldType ft = new DateFieldType(
633+
"domainAttributes.dueDate",
634+
true,
635+
true,
636+
true,
637+
DateFormatter.forPattern("yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis||date_optional_time"),
638+
Resolution.MILLISECONDS,
639+
null,
640+
Collections.emptyMap()
641+
);
642+
643+
Directory dir = newDirectory();
644+
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(null));
645+
646+
int nullDocs = 3500;
647+
int datedDocs = 50;
648+
649+
for (int i = 0; i < nullDocs; i++) {
650+
ParseContext.Document doc = new ParseContext.Document();
651+
doc.add(new StringField("domainAttributes.firmId", "12345678910111213", Field.Store.YES));
652+
w.addDocument(doc);
653+
}
654+
655+
for (int i = 1; i <= datedDocs; i++) {
656+
ParseContext.Document doc = new ParseContext.Document();
657+
String dateStr = String.format(Locale.ROOT, "2022-03-%02dT15:40:58.324", (i % 30) + 1);
658+
long timestamp = Resolution.MILLISECONDS.convert(DateFormatters.from(ft.dateTimeFormatter().parse(dateStr)).toInstant());
659+
doc.add(new StringField("domainAttributes.firmId", "12345678910111213", Field.Store.YES));
660+
doc.add(new LongPoint(ft.name(), timestamp));
661+
doc.add(new SortedNumericDocValuesField(ft.name(), timestamp));
662+
doc.add(new StoredField(ft.name(), timestamp));
663+
w.addDocument(doc);
664+
}
665+
666+
DirectoryReader reader = DirectoryReader.open(w);
667+
IndexSearcher searcher = new IndexSearcher(reader);
668+
669+
BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder();
670+
queryBuilder.add(new TermQuery(new Term("domainAttributes.firmId", "12345678910111213")), BooleanClause.Occur.MUST);
671+
672+
Sort sort = new Sort(new SortField(ft.name(), SortField.Type.DOC, false));
673+
674+
for (int i = 0; i < 100; i++) {
675+
TopDocs topDocs = searcher.search(queryBuilder.build(), nullDocs + datedDocs, sort);
676+
assertEquals("Total hits should match total documents", nullDocs + datedDocs, topDocs.totalHits.value);
677+
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
678+
org.apache.lucene.document.Document doc = reader.document(scoreDoc.doc);
679+
IndexableField dateField = doc.getField(ft.name());
680+
if (dateField != null) {
681+
long dateValue = dateField.numericValue().longValue();
682+
Instant dateInstant = Instant.ofEpochMilli(dateValue);
683+
assertTrue(
684+
"Date should be in March 2022",
685+
dateInstant.isAfter(Instant.parse("2022-03-01T00:00:00Z"))
686+
&& dateInstant.isBefore(Instant.parse("2022-04-01T00:00:00Z"))
687+
);
688+
}
689+
}
690+
}
691+
IOUtils.close(reader, w, dir);
692+
}
494693
}

0 commit comments

Comments
 (0)