Skip to content

Commit 0e1adb4

Browse files
author
Bhumika Saini
committed
Use .si file's Lucene version for files without version info
Signed-off-by: Bhumika Saini <sabhumik@amazon.com>
1 parent e1ce4e6 commit 0e1adb4

File tree

3 files changed

+50
-56
lines changed

3 files changed

+50
-56
lines changed

server/src/main/java/org/opensearch/index/remote/RemoteStoreUtils.java

+15-19
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,7 @@
88

99
package org.opensearch.index.remote;
1010

11-
import org.apache.lucene.index.CorruptIndexException;
12-
1311
import java.util.Arrays;
14-
import java.util.regex.Matcher;
15-
import java.util.regex.Pattern;
1612

1713
/**
1814
* Utils for remote store
@@ -54,23 +50,23 @@ public static long invertLong(String str) {
5450
}
5551

5652
/**
57-
* Extracts the Lucene major version from the provided DocValuesUpdates file name
58-
* @param filename DocValuesUpdates file name to parse
59-
* @return Lucene major version that wrote the DocValuesUpdates file
60-
* @throws CorruptIndexException If the Lucene major version cannot be inferred
53+
* Extracts the segment name from the provided segment file name
54+
* @param filename Segment file name to parse
55+
* @return Name of the segment that the segment file belongs to
6156
*/
62-
public static int getLuceneVersionForDocValuesUpdates(String filename) throws CorruptIndexException {
63-
// TODO: The following regex could work incorrectly if both major and minor versions are double-digits.
64-
// This is because the major and minor versions do not have a separator in the filename currently
65-
// (Lucence<major><minor>).
66-
// We may need to revisit this if the filename pattern is updated in future Lucene versions.
67-
Pattern docValuesUpdatesFileNamePattern = Pattern.compile("_\\d+_\\d+_Lucene(\\d+)\\d+_\\d+");
57+
public static String getSegmentName(String filename) {
58+
// Segment file names follow patterns like "_0.cfe" or "_0_1_Lucene90_0.dvm".
59+
// Here, the segment name is "_0", which is the set of characters
60+
// starting with "_" until the next "_" or first ".".
61+
int endIdx = filename.indexOf('_', 1);
62+
if (endIdx == -1) {
63+
endIdx = filename.indexOf('.');
64+
}
6865

69-
Matcher matcher = docValuesUpdatesFileNamePattern.matcher(filename);
70-
if (matcher.find()) {
71-
return Integer.parseInt(matcher.group(1));
72-
} else {
73-
throw new CorruptIndexException("Unable to infer Lucene version for segment file " + filename, filename);
66+
if (endIdx == -1) {
67+
throw new IllegalArgumentException("Unable to infer segment name for segment file " + filename);
7468
}
69+
70+
return filename.substring(0, endIdx);
7571
}
7672
}

server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectory.java

+17-32
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
import org.apache.lucene.index.CorruptIndexException;
1717
import org.apache.lucene.index.SegmentCommitInfo;
1818
import org.apache.lucene.index.SegmentInfo;
19-
import org.apache.lucene.index.CorruptIndexException;
2019
import org.apache.lucene.index.SegmentInfos;
2120
import org.apache.lucene.store.ByteBuffersDataOutput;
2221
import org.apache.lucene.store.ByteBuffersIndexOutput;
@@ -624,22 +623,12 @@ public void uploadMetadata(
624623
);
625624
try {
626625
try (IndexOutput indexOutput = storeDirectory.createOutput(metadataFilename, IOContext.DEFAULT)) {
627-
Map<String, Integer> segmentToLuceneVersion = getSegmentToLuceneVersion(segmentInfosSnapshot);
626+
Map<String, Integer> segmentToLuceneVersion = getSegmentToLuceneVersion(segmentFiles, segmentInfosSnapshot);
628627
Map<String, String> uploadedSegments = new HashMap<>();
629628
for (String file : segmentFiles) {
630629
if (segmentsUploadedToRemoteStore.containsKey(file)) {
631630
UploadedSegmentMetadata metadata = segmentsUploadedToRemoteStore.get(file);
632-
if (segmentToLuceneVersion.containsKey(metadata.originalFilename)) {
633-
metadata.setWrittenByMajor(segmentToLuceneVersion.get(metadata.originalFilename));
634-
} else if (metadata.originalFilename.equals(segmentInfosSnapshot.getSegmentsFileName())) {
635-
metadata.setWrittenByMajor(segmentInfosSnapshot.getCommitLuceneVersion().major);
636-
} else {
637-
throw new CorruptIndexException(
638-
"Lucene version is missing for segment file " + metadata.originalFilename,
639-
metadata.originalFilename
640-
);
641-
}
642-
631+
metadata.setWrittenByMajor(segmentToLuceneVersion.get(metadata.originalFilename));
643632
uploadedSegments.put(file, metadata.toString());
644633
} else {
645634
throw new NoSuchFileException(file);
@@ -671,34 +660,30 @@ public void uploadMetadata(
671660
}
672661

673662
/**
674-
* Parses the provided SegmenttInfos to retrieve a mapping of segment files to the respective Lucene major version that wrote the segments
675-
* @param segmentInfosSnapshot SegmenttInfos instance to parse
676-
* @return Map of segment file name to the Lucene major version that wrote it
677-
* @throws CorruptIndexException If the Lucene major version cannot be parsed for a segment file
663+
* Parses the provided SegmentInfos to retrieve a mapping of the provided segment files to
664+
* the respective Lucene major version that wrote the segments
665+
* @param segmentFiles List of segment files for which the Lucene major version is needed
666+
* @param segmentInfosSnapshot SegmentInfos instance to parse
667+
* @return Map of the segment file to its Lucene major version
678668
*/
679-
private Map<String, Integer> getSegmentToLuceneVersion(SegmentInfos segmentInfosSnapshot) throws CorruptIndexException {
669+
private Map<String, Integer> getSegmentToLuceneVersion(Collection<String> segmentFiles, SegmentInfos segmentInfosSnapshot) {
680670
Map<String, Integer> segmentToLuceneVersion = new HashMap<>();
681671
for (SegmentCommitInfo segmentCommitInfo : segmentInfosSnapshot) {
682672
SegmentInfo info = segmentCommitInfo.info;
683673
Set<String> segFiles = info.files();
684674
for (String file : segFiles) {
685675
segmentToLuceneVersion.put(file, info.getVersion().major);
686676
}
677+
}
687678

688-
int docValuesUpdatesLuceneMajorVersion = -1;
689-
Map<Integer, Set<String>> docValuesUpdatesFiles = segmentCommitInfo.getDocValuesUpdatesFiles();
690-
for (int key : docValuesUpdatesFiles.keySet()) {
691-
for (String file : docValuesUpdatesFiles.get(key)) {
692-
if (docValuesUpdatesLuceneMajorVersion == -1) {
693-
docValuesUpdatesLuceneMajorVersion = RemoteStoreUtils.getLuceneVersionForDocValuesUpdates(file);
694-
}
695-
696-
segmentToLuceneVersion.put(file, docValuesUpdatesLuceneMajorVersion);
697-
}
698-
699-
Set<String> fieldInfosFiles = segmentCommitInfo.getFieldInfosFiles();
700-
for (String file : fieldInfosFiles) {
701-
segmentToLuceneVersion.put(file, docValuesUpdatesLuceneMajorVersion);
679+
for (String file : segmentFiles) {
680+
if (segmentToLuceneVersion.containsKey(file) == false) {
681+
if (file.equals(segmentInfosSnapshot.getSegmentsFileName())) {
682+
segmentToLuceneVersion.put(file, segmentInfosSnapshot.getCommitLuceneVersion().major);
683+
} else {
684+
// Fallback to the Lucene major version of the respective segment's .si file
685+
String segmentInfoFileName = RemoteStoreUtils.getSegmentName(file) + ".si";
686+
segmentToLuceneVersion.put(file, segmentToLuceneVersion.get(segmentInfoFileName));
702687
}
703688
}
704689
}

server/src/test/java/org/opensearch/index/remote/RemoteStoreUtilsTests.java

+18-5
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88

99
package org.opensearch.index.remote;
1010

11-
import org.apache.lucene.index.CorruptIndexException;
1211
import org.opensearch.test.OpenSearchTestCase;
1312

1413
public class RemoteStoreUtilsTests extends OpenSearchTestCase {
@@ -40,11 +39,25 @@ public void testinvert() {
4039
}
4140
}
4241

43-
public void testGetLuceneVersionForDocValuesUpdates() throws CorruptIndexException {
44-
assertEquals(9, RemoteStoreUtils.getLuceneVersionForDocValuesUpdates("_0_1_Lucene90_0.dvm"));
42+
public void testGetSegmentNameForCfeFile() {
43+
assertEquals("_foo", RemoteStoreUtils.getSegmentName("_foo.cfe"));
4544
}
4645

47-
public void testGetLuceneVersionForDocValuesUpdatesException() {
48-
assertThrows(CorruptIndexException.class, () -> RemoteStoreUtils.getLuceneVersionForDocValuesUpdates("_0_1_Asserting_0.dvm"));
46+
public void testGetSegmentNameForDvmFile() {
47+
assertEquals("_bar", RemoteStoreUtils.getSegmentName("_bar_1_Lucene90_0.dvm"));
48+
}
49+
50+
public void testGetSegmentNameWeirdSegmentNameOnlyUnderscore() {
51+
// Validate behaviour when segment name contains delimiters only
52+
assertEquals("_", RemoteStoreUtils.getSegmentName("_.dvm"));
53+
}
54+
55+
public void testGetSegmentNameUnderscoreDelimiterOverrides() {
56+
// Validate behaviour when segment name contains delimiters only
57+
assertEquals("_", RemoteStoreUtils.getSegmentName("___.dvm"));
58+
}
59+
60+
public void testGetSegmentNameException() {
61+
assertThrows(IllegalArgumentException.class, () -> RemoteStoreUtils.getSegmentName("dvd"));
4962
}
5063
}

0 commit comments

Comments
 (0)