Skip to content

Commit 611ecc2

Browse files
author
Bhumika Saini
authored
[Remote Segment Store] Add Lucene major version to UploadedSegmentMetadata (opensearch-project#8088)
* Add Lucene version to UploadedSegmentMetadata --------- Signed-off-by: Bhumika Saini <sabhumik@amazon.com>
1 parent b669533 commit 611ecc2

File tree

5 files changed

+225
-33
lines changed

5 files changed

+225
-33
lines changed

server/src/main/java/org/opensearch/index/remote/RemoteStoreUtils.java

+21
Original file line numberDiff line numberDiff line change
@@ -48,4 +48,25 @@ public static long invertLong(String str) {
4848
}
4949
return Long.MAX_VALUE - num;
5050
}
51+
52+
/**
53+
* Extracts the segment name from the provided segment file name
54+
* @param filename Segment file name to parse
55+
* @return Name of the segment that the segment file belongs to
56+
*/
57+
public static String getSegmentName(String filename) {
58+
// Segment file names follow patterns like "_0.cfe" or "_0_1_Lucene90_0.dvm".
59+
// Here, the segment name is "_0", which is the set of characters
60+
// starting with "_" until the next "_" or first ".".
61+
int endIdx = filename.indexOf('_', 1);
62+
if (endIdx == -1) {
63+
endIdx = filename.indexOf('.');
64+
}
65+
66+
if (endIdx == -1) {
67+
throw new IllegalArgumentException("Unable to infer segment name for segment file " + filename);
68+
}
69+
70+
return filename.substring(0, endIdx);
71+
}
5172
}

server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectory.java

+79-3
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
import org.apache.logging.log4j.message.ParameterizedMessage;
1515
import org.apache.lucene.codecs.CodecUtil;
1616
import org.apache.lucene.index.CorruptIndexException;
17+
import org.apache.lucene.index.SegmentCommitInfo;
18+
import org.apache.lucene.index.SegmentInfo;
1719
import org.apache.lucene.index.SegmentInfos;
1820
import org.apache.lucene.store.ByteBuffersDataOutput;
1921
import org.apache.lucene.store.ByteBuffersIndexOutput;
@@ -22,6 +24,7 @@
2224
import org.apache.lucene.store.IOContext;
2325
import org.apache.lucene.store.IndexInput;
2426
import org.apache.lucene.store.IndexOutput;
27+
import org.apache.lucene.util.Version;
2528
import org.opensearch.ExceptionsHelper;
2629
import org.opensearch.action.ActionListener;
2730
import org.opensearch.common.UUIDs;
@@ -217,6 +220,14 @@ public static class UploadedSegmentMetadata {
217220
private final String checksum;
218221
private final long length;
219222

223+
/**
224+
* The Lucene major version that wrote the original segment files.
225+
* As part of the Lucene version compatibility check, this version information stored in the metadata
226+
* will be used to skip downloading the segment files unnecessarily
227+
* if they were written by an incompatible Lucene version.
228+
*/
229+
private int writtenByMajor;
230+
220231
UploadedSegmentMetadata(String originalFilename, String uploadedFilename, String checksum, long length) {
221232
this.originalFilename = originalFilename;
222233
this.uploadedFilename = uploadedFilename;
@@ -226,7 +237,14 @@ public static class UploadedSegmentMetadata {
226237

227238
@Override
228239
public String toString() {
229-
return String.join(SEPARATOR, originalFilename, uploadedFilename, checksum, String.valueOf(length));
240+
return String.join(
241+
SEPARATOR,
242+
originalFilename,
243+
uploadedFilename,
244+
checksum,
245+
String.valueOf(length),
246+
String.valueOf(writtenByMajor)
247+
);
230248
}
231249

232250
public String getChecksum() {
@@ -239,12 +257,35 @@ public long getLength() {
239257

240258
public static UploadedSegmentMetadata fromString(String uploadedFilename) {
241259
String[] values = uploadedFilename.split(SEPARATOR);
242-
return new UploadedSegmentMetadata(values[0], values[1], values[2], Long.parseLong(values[3]));
260+
UploadedSegmentMetadata metadata = new UploadedSegmentMetadata(values[0], values[1], values[2], Long.parseLong(values[3]));
261+
if (values.length < 5) {
262+
logger.error("Lucene version is missing for UploadedSegmentMetadata: " + uploadedFilename);
263+
}
264+
265+
metadata.setWrittenByMajor(Integer.parseInt(values[4]));
266+
267+
return metadata;
243268
}
244269

245270
public String getOriginalFilename() {
246271
return originalFilename;
247272
}
273+
274+
public void setWrittenByMajor(int writtenByMajor) {
275+
if (writtenByMajor <= Version.LATEST.major && writtenByMajor >= Version.MIN_SUPPORTED_MAJOR) {
276+
this.writtenByMajor = writtenByMajor;
277+
} else {
278+
throw new IllegalArgumentException(
279+
"Lucene major version supplied ("
280+
+ writtenByMajor
281+
+ ") is incorrect. Should be between Version.LATEST ("
282+
+ Version.LATEST.major
283+
+ ") and Version.MIN_SUPPORTED_MAJOR ("
284+
+ Version.MIN_SUPPORTED_MAJOR
285+
+ ")."
286+
);
287+
}
288+
}
248289
}
249290

250291
/**
@@ -582,10 +623,13 @@ public void uploadMetadata(
582623
);
583624
try {
584625
try (IndexOutput indexOutput = storeDirectory.createOutput(metadataFilename, IOContext.DEFAULT)) {
626+
Map<String, Integer> segmentToLuceneVersion = getSegmentToLuceneVersion(segmentFiles, segmentInfosSnapshot);
585627
Map<String, String> uploadedSegments = new HashMap<>();
586628
for (String file : segmentFiles) {
587629
if (segmentsUploadedToRemoteStore.containsKey(file)) {
588-
uploadedSegments.put(file, segmentsUploadedToRemoteStore.get(file).toString());
630+
UploadedSegmentMetadata metadata = segmentsUploadedToRemoteStore.get(file);
631+
metadata.setWrittenByMajor(segmentToLuceneVersion.get(metadata.originalFilename));
632+
uploadedSegments.put(file, metadata.toString());
589633
} else {
590634
throw new NoSuchFileException(file);
591635
}
@@ -615,6 +659,38 @@ public void uploadMetadata(
615659
}
616660
}
617661

662+
/**
663+
* Parses the provided SegmentInfos to retrieve a mapping of the provided segment files to
664+
* the respective Lucene major version that wrote the segments
665+
* @param segmentFiles List of segment files for which the Lucene major version is needed
666+
* @param segmentInfosSnapshot SegmentInfos instance to parse
667+
* @return Map of the segment file to its Lucene major version
668+
*/
669+
private Map<String, Integer> getSegmentToLuceneVersion(Collection<String> segmentFiles, SegmentInfos segmentInfosSnapshot) {
670+
Map<String, Integer> segmentToLuceneVersion = new HashMap<>();
671+
for (SegmentCommitInfo segmentCommitInfo : segmentInfosSnapshot) {
672+
SegmentInfo info = segmentCommitInfo.info;
673+
Set<String> segFiles = info.files();
674+
for (String file : segFiles) {
675+
segmentToLuceneVersion.put(file, info.getVersion().major);
676+
}
677+
}
678+
679+
for (String file : segmentFiles) {
680+
if (segmentToLuceneVersion.containsKey(file) == false) {
681+
if (file.equals(segmentInfosSnapshot.getSegmentsFileName())) {
682+
segmentToLuceneVersion.put(file, segmentInfosSnapshot.getCommitLuceneVersion().major);
683+
} else {
684+
// Fallback to the Lucene major version of the respective segment's .si file
685+
String segmentInfoFileName = RemoteStoreUtils.getSegmentName(file) + ".si";
686+
segmentToLuceneVersion.put(file, segmentToLuceneVersion.get(segmentInfoFileName));
687+
}
688+
}
689+
}
690+
691+
return segmentToLuceneVersion;
692+
}
693+
618694
/**
619695
* Try to delete file from local store. Fails silently on failures
620696
* @param filename: name of the file to be deleted

server/src/test/java/org/opensearch/index/remote/RemoteStoreUtilsTests.java

+22
Original file line numberDiff line numberDiff line change
@@ -38,4 +38,26 @@ public void testinvert() {
3838
assertEquals(num, RemoteStoreUtils.invertLong(RemoteStoreUtils.invertLong(num)));
3939
}
4040
}
41+
42+
public void testGetSegmentNameForCfeFile() {
43+
assertEquals("_foo", RemoteStoreUtils.getSegmentName("_foo.cfe"));
44+
}
45+
46+
public void testGetSegmentNameForDvmFile() {
47+
assertEquals("_bar", RemoteStoreUtils.getSegmentName("_bar_1_Lucene90_0.dvm"));
48+
}
49+
50+
public void testGetSegmentNameWeirdSegmentNameOnlyUnderscore() {
51+
// Validate behaviour when segment name contains delimiters only
52+
assertEquals("_", RemoteStoreUtils.getSegmentName("_.dvm"));
53+
}
54+
55+
public void testGetSegmentNameUnderscoreDelimiterOverrides() {
56+
// Validate behaviour when segment name contains delimiters only
57+
assertEquals("_", RemoteStoreUtils.getSegmentName("___.dvm"));
58+
}
59+
60+
public void testGetSegmentNameException() {
61+
assertThrows(IllegalArgumentException.class, () -> RemoteStoreUtils.getSegmentName("dvd"));
62+
}
4163
}

0 commit comments

Comments
 (0)