Skip to content

Commit 5e7a711

Browse files
committed
Fixed keyword range ordinal seek status logic and added keyword in tests
Signed-off-by: expani <anijainc@amazon.com>
1 parent 973edf5 commit 5e7a711

File tree

7 files changed

+137
-57
lines changed

7 files changed

+137
-57
lines changed

server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/node/FixedLengthStarTreeNode.java

+15-5
Original file line numberDiff line numberDiff line change
@@ -291,7 +291,7 @@ private FixedLengthStarTreeNode binarySearchChild(long dimensionValue, StarTreeN
291291
}
292292

293293
@Override
294-
public void collectChildrenInRange(Long low, Long high, StarTreeNodeCollector collector) throws IOException {
294+
public void collectChildrenInRange(long low, long high, StarTreeNodeCollector collector) throws IOException {
295295
if (low <= high) {
296296
FixedLengthStarTreeNode lowStarTreeNode = binarySearchChild(low, true, null);
297297
if (lowStarTreeNode != null) {
@@ -307,21 +307,33 @@ public void collectChildrenInRange(Long low, Long high, StarTreeNodeCollector co
307307
}
308308
}
309309

310+
/**
311+
*
312+
* @param dimensionValue : The dimension to match.
313+
* @param matchNextHighest : If true then we try to return @dimensionValue or the next Highest. Else, we return @dimensionValue or the next Lowest.
314+
* @param lastMatchedNode : If not null, we begin the binary search from the node after this.
315+
* @return : Matched node or null.
316+
* @throws IOException
317+
*/
310318
private FixedLengthStarTreeNode binarySearchChild(long dimensionValue, boolean matchNextHighest, StarTreeNode lastMatchedNode)
311319
throws IOException {
312320

313321
int low = firstChildId;
314322
int tempLow = low;
323+
int starNodeId, nullNodeId;
324+
starNodeId = nullNodeId = Integer.MIN_VALUE;
315325

316326
// if the current node is star node, increment the tempLow to reduce the search space
317327
if (matchStarTreeNodeTypeOrNull(new FixedLengthStarTreeNode(in, tempLow), StarTreeNodeType.STAR) != null) {
328+
starNodeId = tempLow;
318329
tempLow++;
319330
}
320331

321332
int high = getInt(LAST_CHILD_ID_OFFSET);
322333
int tempHigh = high;
323334
// if the current node is null node, decrement the tempHigh to reduce the search space
324335
if (matchStarTreeNodeTypeOrNull(new FixedLengthStarTreeNode(in, tempHigh), StarTreeNodeType.NULL) != null) {
336+
nullNodeId = tempHigh;
325337
tempHigh--;
326338
}
327339

@@ -346,8 +358,7 @@ private FixedLengthStarTreeNode binarySearchChild(long dimensionValue, boolean m
346358
if (midDimensionValue < dimensionValue) { // Going to the right from mid to search next
347359
tempLow = mid + 1;
348360
// We are going out of bounds for this dimension on the right side.
349-
if ((tempLow > high)
350-
|| matchStarTreeNodeTypeOrNull(new FixedLengthStarTreeNode(in, tempLow), StarTreeNodeType.NULL) != null) {
361+
if (tempLow > high || tempLow == nullNodeId) {
351362
return matchNextHighest ? null : midNode;
352363
} else {
353364
FixedLengthStarTreeNode nodeGreaterThanMid = new FixedLengthStarTreeNode(in, tempLow);
@@ -358,8 +369,7 @@ private FixedLengthStarTreeNode binarySearchChild(long dimensionValue, boolean m
358369
} else { // Going to the left from mid to search next
359370
tempHigh = mid - 1;
360371
// We are going out of bounds for this dimension on the left side.
361-
if ((tempHigh < low)
362-
|| matchStarTreeNodeTypeOrNull(new FixedLengthStarTreeNode(in, tempHigh), StarTreeNodeType.STAR) != null) {
372+
if (tempHigh < low || tempHigh == starNodeId) {
363373
return matchNextHighest ? midNode : null;
364374
} else {
365375
FixedLengthStarTreeNode nodeLessThanMid = new FixedLengthStarTreeNode(in, tempHigh);

server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeNode.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ public interface StarTreeNode {
112112

113113
StarTreeNode getChildForDimensionValue(Long dimensionValue, StarTreeNode lastMatchedChild) throws IOException;
114114

115-
void collectChildrenInRange(Long low, Long high, StarTreeNodeCollector collector) throws IOException;
115+
void collectChildrenInRange(long low, long high, StarTreeNodeCollector collector) throws IOException;
116116

117117
/**
118118
* Returns the child star node for a node in the star-tree.

server/src/main/java/org/opensearch/search/startree/filter/RangeMatchDimFilter.java

+4-3
Original file line numberDiff line numberDiff line change
@@ -52,10 +52,12 @@ public void initialiseForSegment(StarTreeValues starTreeValues, SearchContext se
5252
Optional<Long> lowOrdinalFound = dimensionFilterMapper.getMatchingOrdinal(dimensionName, low, starTreeValues, lowMatchType);
5353
if (lowOrdinalFound.isPresent()) {
5454
lowOrdinal = lowOrdinalFound.get();
55-
} else { // This is only valid for Non-numeric fields.
55+
} else {
56+
// This is only valid for Non-numeric fields.
57+
// High can't be found since nothing >= low exists.
5658
lowOrdinal = highOrdinal = Long.MAX_VALUE;
5759
skipRangeCollection = true;
58-
return; // High can't be found since nothing >= low exists.
60+
return;
5961
}
6062
}
6163
highOrdinal = Long.MAX_VALUE;
@@ -76,7 +78,6 @@ public void matchStarTreeNodes(StarTreeNode parentNode, StarTreeValues starTreeV
7678

7779
@Override
7880
public boolean matchDimValue(long ordinal, StarTreeValues starTreeValues) {
79-
// FIXME : Needs to be based on Match Type.
8081
return lowOrdinal <= ordinal && ordinal <= highOrdinal;
8182
}
8283

server/src/main/java/org/opensearch/search/startree/filter/provider/DimensionFilterMapper.java

+40-21
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
import static org.opensearch.index.mapper.NumberFieldMapper.NumberType.SHORT;
4444
import static org.opensearch.index.mapper.NumberFieldMapper.NumberType.hasDecimalPart;
4545
import static org.opensearch.index.mapper.NumberFieldMapper.NumberType.signum;
46+
import static org.opensearch.search.startree.filter.DimensionFilter.MatchType.GTE;
4647

4748
public interface DimensionFilterMapper {
4849
DimensionFilter getExactMatchFilter(MappedFieldType mappedFieldType, List<Object> rawValues);
@@ -345,20 +346,36 @@ public Optional<Long> getMatchingOrdinal(
345346
} else {
346347
TermsEnum termsEnum = sortedSetIterator.termsEnum();
347348
TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil((BytesRef) value);
348-
if (matchType == DimensionFilter.MatchType.GT || matchType == DimensionFilter.MatchType.GTE) {
349-
// We reached the end and couldn't match anything, else we found a term which matches.
350-
return (seekStatus == TermsEnum.SeekStatus.END) ? Optional.empty() : Optional.of(termsEnum.ord());
351-
} else { // LT || LTE
352-
// If we found a term just greater, then return ordinal of the term just before it.
353-
if (seekStatus == TermsEnum.SeekStatus.NOT_FOUND) {
354-
long ordGreaterThanValue = termsEnum.ord();
355-
// Checking if we are in bounds for satisfying LT
356-
return ((ordGreaterThanValue - 1) < sortedSetIterator.getValueCount())
357-
? Optional.of(ordGreaterThanValue - 1)
358-
: Optional.empty();
359-
} else {
360-
return Optional.of(termsEnum.ord());
361-
}
349+
// We reached the end and couldn't match anything, else we found a term which matches.
350+
// LT || LTE
351+
// If we found a term just greater, then return ordinal of the term just before it.
352+
// Checking if we are in bounds for satisfying LT
353+
// Checking if we are in bounds for satisfying LT
354+
switch (matchType) {
355+
case GTE:
356+
return seekStatus == TermsEnum.SeekStatus.END ? Optional.empty() : Optional.of(termsEnum.ord());
357+
case GT:
358+
return switch (seekStatus) {
359+
case END -> Optional.empty();
360+
case FOUND -> ((termsEnum.ord() + 1) < sortedSetIterator.getValueCount())
361+
? Optional.of(termsEnum.ord() + 1)
362+
: Optional.empty();
363+
case NOT_FOUND -> Optional.of(termsEnum.ord());
364+
};
365+
case LTE:
366+
if (seekStatus == TermsEnum.SeekStatus.NOT_FOUND) {
367+
return ((termsEnum.ord() - 1) >= 0) ? Optional.of(termsEnum.ord() - 1) : Optional.empty();
368+
} else {
369+
return Optional.of(termsEnum.ord());
370+
}
371+
case LT:
372+
if (seekStatus == TermsEnum.SeekStatus.END) {
373+
return Optional.of(termsEnum.ord());
374+
} else {
375+
return ((termsEnum.ord() - 1) >= 0) ? Optional.of(termsEnum.ord() - 1) : Optional.empty();
376+
}
377+
default:
378+
throw new IllegalStateException("unexpected matchType " + matchType);
362379
}
363380
}
364381
} catch (IOException e) {
@@ -371,14 +388,16 @@ public Optional<Long> getMatchingOrdinal(
371388

372389
// TODO : Think around making TermBasedFT#indexedValueForSearch() accessor public for reuse here.
373390
private Object parseRawKeyword(String field, Object rawValue, KeywordFieldType keywordFieldType) {
374-
Object parsedValue;
375-
if (keywordFieldType.getTextSearchInfo().getSearchAnalyzer() == Lucene.KEYWORD_ANALYZER) {
376-
parsedValue = BytesRefs.toBytesRef(rawValue);
377-
} else {
378-
if (rawValue instanceof BytesRef) {
379-
rawValue = ((BytesRef) rawValue).utf8ToString();
391+
Object parsedValue = null;
392+
if (rawValue != null) {
393+
if (keywordFieldType.getTextSearchInfo().getSearchAnalyzer() == Lucene.KEYWORD_ANALYZER) {
394+
parsedValue = BytesRefs.toBytesRef(rawValue);
395+
} else {
396+
if (rawValue instanceof BytesRef) {
397+
rawValue = ((BytesRef) rawValue).utf8ToString();
398+
}
399+
parsedValue = keywordFieldType.getTextSearchInfo().getSearchAnalyzer().normalize(field, rawValue.toString());
380400
}
381-
parsedValue = keywordFieldType.getTextSearchInfo().getSearchAnalyzer().normalize(field, rawValue.toString());
382401
}
383402
return parsedValue;
384403
}

0 commit comments

Comments
 (0)