|
31 | 31 |
|
32 | 32 | package org.opensearch.index.mapper;
|
33 | 33 |
|
| 34 | +import org.apache.lucene.document.Field; |
34 | 35 | import org.apache.lucene.document.LongPoint;
|
35 | 36 | import org.apache.lucene.document.NumericDocValuesField;
|
36 | 37 | import org.apache.lucene.document.SortedNumericDocValuesField;
|
| 38 | +import org.apache.lucene.document.StoredField; |
| 39 | +import org.apache.lucene.document.StringField; |
37 | 40 | import org.apache.lucene.index.DirectoryReader;
|
38 | 41 | import org.apache.lucene.index.IndexReader;
|
39 | 42 | import org.apache.lucene.index.IndexWriter;
|
40 | 43 | import org.apache.lucene.index.IndexWriterConfig;
|
| 44 | +import org.apache.lucene.index.IndexableField; |
41 | 45 | import org.apache.lucene.index.MultiReader;
|
42 | 46 | import org.apache.lucene.index.SortedNumericDocValues;
|
| 47 | +import org.apache.lucene.index.Term; |
| 48 | +import org.apache.lucene.search.BooleanClause; |
| 49 | +import org.apache.lucene.search.BooleanQuery; |
43 | 50 | import org.apache.lucene.search.DocIdSetIterator;
|
44 | 51 | import org.apache.lucene.search.IndexOrDocValuesQuery;
|
45 | 52 | import org.apache.lucene.search.IndexSearcher;
|
46 | 53 | import org.apache.lucene.search.IndexSortSortedNumericDocValuesRangeQuery;
|
47 | 54 | import org.apache.lucene.search.Query;
|
| 55 | +import org.apache.lucene.search.ScoreDoc; |
| 56 | +import org.apache.lucene.search.Sort; |
| 57 | +import org.apache.lucene.search.SortField; |
| 58 | +import org.apache.lucene.search.TermQuery; |
| 59 | +import org.apache.lucene.search.TopDocs; |
48 | 60 | import org.apache.lucene.store.Directory;
|
49 | 61 | import org.opensearch.Version;
|
50 | 62 | import org.opensearch.cluster.metadata.IndexMetadata;
|
|
72 | 84 | import org.joda.time.DateTimeZone;
|
73 | 85 |
|
74 | 86 | import java.io.IOException;
|
| 87 | +import java.time.Instant; |
75 | 88 | import java.time.ZoneOffset;
|
| 89 | +import java.util.Arrays; |
76 | 90 | import java.util.Collections;
|
| 91 | +import java.util.List; |
| 92 | +import java.util.Locale; |
77 | 93 |
|
78 | 94 | import static org.hamcrest.CoreMatchers.is;
|
79 | 95 | import static org.apache.lucene.document.LongPoint.pack;
|
@@ -491,4 +507,187 @@ public void testParseSourceValueNanos() throws IOException {
|
491 | 507 | MappedFieldType nullValueMapper = fieldType(Resolution.NANOSECONDS, "strict_date_time||epoch_millis", nullValueDate);
|
492 | 508 | assertEquals(Collections.singletonList(nullValueDate), fetchSourceValue(nullValueMapper, null));
|
493 | 509 | }
|
| 510 | + |
| 511 | + public void testDateResolutionForOverflow() throws IOException { |
| 512 | + Directory dir = newDirectory(); |
| 513 | + IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(null)); |
| 514 | + |
| 515 | + DateFieldType ft = new DateFieldType( |
| 516 | + "test_date", |
| 517 | + true, |
| 518 | + true, |
| 519 | + true, |
| 520 | + DateFormatter.forPattern("yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis||strict_date_optional_time"), |
| 521 | + Resolution.MILLISECONDS, |
| 522 | + null, |
| 523 | + Collections.emptyMap() |
| 524 | + ); |
| 525 | + |
| 526 | + List<String> dates = Arrays.asList( |
| 527 | + null, |
| 528 | + "2020-01-01T00:00:00Z", |
| 529 | + null, |
| 530 | + "2021-01-01T00:00:00Z", |
| 531 | + "+292278994-08-17T07:12:55.807Z", |
| 532 | + null, |
| 533 | + "-292275055-05-16T16:47:04.192Z" |
| 534 | + ); |
| 535 | + |
| 536 | + int numNullDates = 0; |
| 537 | + long minDateValue = Long.MAX_VALUE; |
| 538 | + long maxDateValue = Long.MIN_VALUE; |
| 539 | + |
| 540 | + for (int i = 0; i < dates.size(); i++) { |
| 541 | + ParseContext.Document doc = new ParseContext.Document(); |
| 542 | + String dateStr = dates.get(i); |
| 543 | + |
| 544 | + if (dateStr != null) { |
| 545 | + long timestamp = Resolution.MILLISECONDS.convert(DateFormatters.from(ft.dateTimeFormatter().parse(dateStr)).toInstant()); |
| 546 | + doc.add(new LongPoint(ft.name(), timestamp)); |
| 547 | + doc.add(new SortedNumericDocValuesField(ft.name(), timestamp)); |
| 548 | + doc.add(new StoredField(ft.name(), timestamp)); |
| 549 | + doc.add(new StoredField("id", i)); |
| 550 | + minDateValue = Math.min(minDateValue, timestamp); |
| 551 | + maxDateValue = Math.max(maxDateValue, timestamp); |
| 552 | + } else { |
| 553 | + numNullDates++; |
| 554 | + doc.add(new StoredField("id", i)); |
| 555 | + } |
| 556 | + w.addDocument(doc); |
| 557 | + } |
| 558 | + |
| 559 | + DirectoryReader reader = DirectoryReader.open(w); |
| 560 | + IndexSearcher searcher = new IndexSearcher(reader); |
| 561 | + |
| 562 | + Settings indexSettings = Settings.builder() |
| 563 | + .put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT) |
| 564 | + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) |
| 565 | + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1) |
| 566 | + .build(); |
| 567 | + QueryShardContext context = new QueryShardContext( |
| 568 | + 0, |
| 569 | + new IndexSettings(IndexMetadata.builder("foo").settings(indexSettings).build(), indexSettings), |
| 570 | + BigArrays.NON_RECYCLING_INSTANCE, |
| 571 | + null, |
| 572 | + null, |
| 573 | + null, |
| 574 | + null, |
| 575 | + null, |
| 576 | + xContentRegistry(), |
| 577 | + writableRegistry(), |
| 578 | + null, |
| 579 | + null, |
| 580 | + () -> nowInMillis, |
| 581 | + null, |
| 582 | + null, |
| 583 | + () -> true, |
| 584 | + null |
| 585 | + ); |
| 586 | + |
| 587 | + Query rangeQuery = ft.rangeQuery( |
| 588 | + "-292275055-05-16T16:47:04.192Z", |
| 589 | + "+292278994-08-17T07:12:55.807Z", |
| 590 | + true, |
| 591 | + true, |
| 592 | + null, |
| 593 | + null, |
| 594 | + null, |
| 595 | + context |
| 596 | + ); |
| 597 | + |
| 598 | + TopDocs topDocs = searcher.search(rangeQuery, dates.size()); |
| 599 | + assertEquals("Number of non-null date documents", dates.size() - numNullDates, topDocs.totalHits.value); |
| 600 | + |
| 601 | + for (ScoreDoc scoreDoc : topDocs.scoreDocs) { |
| 602 | + org.apache.lucene.document.Document doc = reader.document(scoreDoc.doc); |
| 603 | + IndexableField dateField = doc.getField(ft.name()); |
| 604 | + if (dateField != null) { |
| 605 | + long dateValue = dateField.numericValue().longValue(); |
| 606 | + assertTrue( |
| 607 | + "Date value " + dateValue + " should be within valid range", |
| 608 | + dateValue >= minDateValue && dateValue <= maxDateValue |
| 609 | + ); |
| 610 | + } |
| 611 | + } |
| 612 | + |
| 613 | + DateFieldType ftWithNullValue = new DateFieldType( |
| 614 | + "test_date", |
| 615 | + true, |
| 616 | + true, |
| 617 | + true, |
| 618 | + DateFormatter.forPattern("yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis||strict_date_optional_time"), |
| 619 | + Resolution.MILLISECONDS, |
| 620 | + "2020-01-01T00:00:00Z", |
| 621 | + Collections.emptyMap() |
| 622 | + ); |
| 623 | + |
| 624 | + Query nullValueQuery = ftWithNullValue.termQuery("2020-01-01T00:00:00Z", context); |
| 625 | + topDocs = searcher.search(nullValueQuery, dates.size()); |
| 626 | + assertEquals("Documents matching the 2020-01-01 date", 1, topDocs.totalHits.value); |
| 627 | + |
| 628 | + IOUtils.close(reader, w, dir); |
| 629 | + } |
| 630 | + |
| 631 | + public void testDateFieldTypeWithNulls() throws IOException { |
| 632 | + DateFieldType ft = new DateFieldType( |
| 633 | + "domainAttributes.dueDate", |
| 634 | + true, |
| 635 | + true, |
| 636 | + true, |
| 637 | + DateFormatter.forPattern("yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis||date_optional_time"), |
| 638 | + Resolution.MILLISECONDS, |
| 639 | + null, |
| 640 | + Collections.emptyMap() |
| 641 | + ); |
| 642 | + |
| 643 | + Directory dir = newDirectory(); |
| 644 | + IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(null)); |
| 645 | + |
| 646 | + int nullDocs = 3500; |
| 647 | + int datedDocs = 50; |
| 648 | + |
| 649 | + for (int i = 0; i < nullDocs; i++) { |
| 650 | + ParseContext.Document doc = new ParseContext.Document(); |
| 651 | + doc.add(new StringField("domainAttributes.firmId", "12345678910111213", Field.Store.YES)); |
| 652 | + w.addDocument(doc); |
| 653 | + } |
| 654 | + |
| 655 | + for (int i = 1; i <= datedDocs; i++) { |
| 656 | + ParseContext.Document doc = new ParseContext.Document(); |
| 657 | + String dateStr = String.format(Locale.ROOT, "2022-03-%02dT15:40:58.324", (i % 30) + 1); |
| 658 | + long timestamp = Resolution.MILLISECONDS.convert(DateFormatters.from(ft.dateTimeFormatter().parse(dateStr)).toInstant()); |
| 659 | + doc.add(new StringField("domainAttributes.firmId", "12345678910111213", Field.Store.YES)); |
| 660 | + doc.add(new LongPoint(ft.name(), timestamp)); |
| 661 | + doc.add(new SortedNumericDocValuesField(ft.name(), timestamp)); |
| 662 | + doc.add(new StoredField(ft.name(), timestamp)); |
| 663 | + w.addDocument(doc); |
| 664 | + } |
| 665 | + |
| 666 | + DirectoryReader reader = DirectoryReader.open(w); |
| 667 | + IndexSearcher searcher = new IndexSearcher(reader); |
| 668 | + |
| 669 | + BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder(); |
| 670 | + queryBuilder.add(new TermQuery(new Term("domainAttributes.firmId", "12345678910111213")), BooleanClause.Occur.MUST); |
| 671 | + |
| 672 | + Sort sort = new Sort(new SortField(ft.name(), SortField.Type.DOC, false)); |
| 673 | + |
| 674 | + for (int i = 0; i < 100; i++) { |
| 675 | + TopDocs topDocs = searcher.search(queryBuilder.build(), nullDocs + datedDocs, sort); |
| 676 | + assertEquals("Total hits should match total documents", nullDocs + datedDocs, topDocs.totalHits.value); |
| 677 | + for (ScoreDoc scoreDoc : topDocs.scoreDocs) { |
| 678 | + org.apache.lucene.document.Document doc = reader.document(scoreDoc.doc); |
| 679 | + IndexableField dateField = doc.getField(ft.name()); |
| 680 | + if (dateField != null) { |
| 681 | + long dateValue = dateField.numericValue().longValue(); |
| 682 | + Instant dateInstant = Instant.ofEpochMilli(dateValue); |
| 683 | + assertTrue( |
| 684 | + "Date should be in March 2022", |
| 685 | + dateInstant.isAfter(Instant.parse("2022-03-01T00:00:00Z")) |
| 686 | + && dateInstant.isBefore(Instant.parse("2022-04-01T00:00:00Z")) |
| 687 | + ); |
| 688 | + } |
| 689 | + } |
| 690 | + } |
| 691 | + IOUtils.close(reader, w, dir); |
| 692 | + } |
494 | 693 | }
|
0 commit comments