Skip to content

Commit 3d1d5e7

Browse files
authored
Add faster scaling composite hash value encoding for remote path (#13155)
Signed-off-by: Ashish Singh <ssashish@amazon.com>
1 parent 3c8eafd commit 3d1d5e7

File tree

13 files changed

+732
-69
lines changed

13 files changed

+732
-69
lines changed

server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteRestoreSnapshotIT.java

+8-8
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@
5959
import static org.opensearch.index.remote.RemoteStoreEnums.DataCategory.TRANSLOG;
6060
import static org.opensearch.index.remote.RemoteStoreEnums.DataType.DATA;
6161
import static org.opensearch.index.remote.RemoteStoreEnums.DataType.METADATA;
62-
import static org.opensearch.indices.IndicesService.CLUSTER_REMOTE_STORE_PATH_PREFIX_TYPE_SETTING;
62+
import static org.opensearch.indices.IndicesService.CLUSTER_REMOTE_STORE_PATH_TYPE_SETTING;
6363
import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked;
6464
import static org.hamcrest.Matchers.equalTo;
6565
import static org.hamcrest.Matchers.greaterThan;
@@ -229,7 +229,7 @@ public void testRemoteStoreCustomDataOnIndexCreationAndRestore() {
229229
client(clusterManagerNode).admin()
230230
.cluster()
231231
.prepareUpdateSettings()
232-
.setTransientSettings(Settings.builder().put(CLUSTER_REMOTE_STORE_PATH_PREFIX_TYPE_SETTING.getKey(), PathType.FIXED))
232+
.setTransientSettings(Settings.builder().put(CLUSTER_REMOTE_STORE_PATH_TYPE_SETTING.getKey(), PathType.FIXED))
233233
.get();
234234
createRepository(snapshotRepoName, "fs", getRepositorySettings(absolutePath1, true));
235235
Client client = client();
@@ -260,7 +260,7 @@ public void testRemoteStoreCustomDataOnIndexCreationAndRestore() {
260260
client(clusterManagerNode).admin()
261261
.cluster()
262262
.prepareUpdateSettings()
263-
.setTransientSettings(Settings.builder().put(CLUSTER_REMOTE_STORE_PATH_PREFIX_TYPE_SETTING.getKey(), PathType.HASHED_PREFIX))
263+
.setTransientSettings(Settings.builder().put(CLUSTER_REMOTE_STORE_PATH_TYPE_SETTING.getKey(), PathType.HASHED_PREFIX))
264264
.get();
265265

266266
restoreSnapshotResponse = client.admin()
@@ -272,13 +272,13 @@ public void testRemoteStoreCustomDataOnIndexCreationAndRestore() {
272272
.get();
273273
assertEquals(RestStatus.ACCEPTED, restoreSnapshotResponse.status());
274274
ensureGreen(restoredIndexName1version2);
275-
validatePathType(restoredIndexName1version2, PathType.HASHED_PREFIX, PathHashAlgorithm.FNV_1A);
275+
validatePathType(restoredIndexName1version2, PathType.HASHED_PREFIX, PathHashAlgorithm.FNV_1A_COMPOSITE_1);
276276

277-
// Create index with cluster setting cluster.remote_store.index.path.prefix.type as hashed_prefix.
277+
// Create index with cluster setting cluster.remote_store.index.path.type as hashed_prefix.
278278
indexSettings = getIndexSettings(1, 0).build();
279279
createIndex(indexName2, indexSettings);
280280
ensureGreen(indexName2);
281-
validatePathType(indexName2, PathType.HASHED_PREFIX, PathHashAlgorithm.FNV_1A);
281+
validatePathType(indexName2, PathType.HASHED_PREFIX, PathHashAlgorithm.FNV_1A_COMPOSITE_1);
282282

283283
// Validating that custom data has not changed for indexes which were created before the cluster setting got updated
284284
validatePathType(indexName1, PathType.FIXED);
@@ -294,7 +294,7 @@ public void testRemoteStoreCustomDataOnIndexCreationAndRestore() {
294294
client(clusterManagerNode).admin()
295295
.cluster()
296296
.prepareUpdateSettings()
297-
.setTransientSettings(Settings.builder().put(CLUSTER_REMOTE_STORE_PATH_PREFIX_TYPE_SETTING.getKey(), PathType.FIXED))
297+
.setTransientSettings(Settings.builder().put(CLUSTER_REMOTE_STORE_PATH_TYPE_SETTING.getKey(), PathType.FIXED))
298298
.get();
299299

300300
// Close index 2
@@ -309,7 +309,7 @@ public void testRemoteStoreCustomDataOnIndexCreationAndRestore() {
309309
ensureGreen(indexName2);
310310

311311
// Validating that custom data has not changed for testindex2 which was created before the cluster setting got updated
312-
validatePathType(indexName2, PathType.HASHED_PREFIX, PathHashAlgorithm.FNV_1A);
312+
validatePathType(indexName2, PathType.HASHED_PREFIX, PathHashAlgorithm.FNV_1A_COMPOSITE_1);
313313
}
314314

315315
private void validatePathType(String index, PathType pathType) {

server/src/main/java/org/opensearch/common/settings/ClusterSettings.java

+2-1
Original file line numberDiff line numberDiff line change
@@ -713,7 +713,8 @@ public void apply(Settings value, Settings current, Settings previous) {
713713
RemoteStoreNodeService.MIGRATION_DIRECTION_SETTING,
714714
IndicesService.CLUSTER_REMOTE_INDEX_RESTRICT_ASYNC_DURABILITY_SETTING,
715715
IndicesService.CLUSTER_INDEX_RESTRICT_REPLICATION_TYPE_SETTING,
716-
IndicesService.CLUSTER_REMOTE_STORE_PATH_PREFIX_TYPE_SETTING,
716+
IndicesService.CLUSTER_REMOTE_STORE_PATH_TYPE_SETTING,
717+
IndicesService.CLUSTER_REMOTE_STORE_PATH_HASH_ALGORITHM_SETTING,
717718

718719
// Admission Control Settings
719720
AdmissionControlSettings.ADMISSION_CONTROL_TRANSPORT_LAYER_MODE,

server/src/main/java/org/opensearch/index/remote/RemoteStoreEnums.java

+17-2
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323
import static java.util.Collections.unmodifiableMap;
2424
import static org.opensearch.index.remote.RemoteStoreEnums.DataType.DATA;
2525
import static org.opensearch.index.remote.RemoteStoreEnums.DataType.METADATA;
26+
import static org.opensearch.index.remote.RemoteStoreUtils.longToCompositeBase64AndBinaryEncoding;
27+
import static org.opensearch.index.remote.RemoteStoreUtils.longToUrlBase64;
2628

2729
/**
2830
* This class contains the different enums related to remote store like data categories and types, path types
@@ -216,13 +218,26 @@ public static PathType parseString(String pathType) {
216218
@PublicApi(since = "2.14.0")
217219
public enum PathHashAlgorithm {
218220

219-
FNV_1A(0) {
221+
FNV_1A_BASE64(0) {
220222
@Override
221223
String hash(PathInput pathInput) {
222224
String input = pathInput.indexUUID() + pathInput.shardId() + pathInput.dataCategory().getName() + pathInput.dataType()
223225
.getName();
224226
long hash = FNV1a.hash64(input);
225-
return RemoteStoreUtils.longToUrlBase64(hash);
227+
return longToUrlBase64(hash);
228+
}
229+
},
230+
/**
231+
* This hash algorithm will generate a hash value which will use 1st 6 bits to create bas64 character and next 14
232+
* bits to create binary string.
233+
*/
234+
FNV_1A_COMPOSITE_1(1) {
235+
@Override
236+
String hash(PathInput pathInput) {
237+
String input = pathInput.indexUUID() + pathInput.shardId() + pathInput.dataCategory().getName() + pathInput.dataType()
238+
.getName();
239+
long hash = FNV1a.hash64(input);
240+
return longToCompositeBase64AndBinaryEncoding(hash, 20);
226241
}
227242
};
228243

server/src/main/java/org/opensearch/index/remote/RemoteStorePathStrategyResolver.java

+11-3
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,16 @@ public class RemoteStorePathStrategyResolver {
2525

2626
private volatile PathType type;
2727

28+
private volatile PathHashAlgorithm hashAlgorithm;
29+
2830
private final Supplier<Version> minNodeVersionSupplier;
2931

3032
public RemoteStorePathStrategyResolver(ClusterSettings clusterSettings, Supplier<Version> minNodeVersionSupplier) {
3133
this.minNodeVersionSupplier = minNodeVersionSupplier;
32-
type = clusterSettings.get(IndicesService.CLUSTER_REMOTE_STORE_PATH_PREFIX_TYPE_SETTING);
33-
clusterSettings.addSettingsUpdateConsumer(IndicesService.CLUSTER_REMOTE_STORE_PATH_PREFIX_TYPE_SETTING, this::setType);
34+
type = clusterSettings.get(IndicesService.CLUSTER_REMOTE_STORE_PATH_TYPE_SETTING);
35+
hashAlgorithm = clusterSettings.get(IndicesService.CLUSTER_REMOTE_STORE_PATH_HASH_ALGORITHM_SETTING);
36+
clusterSettings.addSettingsUpdateConsumer(IndicesService.CLUSTER_REMOTE_STORE_PATH_TYPE_SETTING, this::setType);
37+
clusterSettings.addSettingsUpdateConsumer(IndicesService.CLUSTER_REMOTE_STORE_PATH_HASH_ALGORITHM_SETTING, this::setHashAlgorithm);
3438
}
3539

3640
public RemoteStorePathStrategy get() {
@@ -39,11 +43,15 @@ public RemoteStorePathStrategy get() {
3943
// Min node version check ensures that we are enabling the new prefix type only when all the nodes understand it.
4044
pathType = Version.CURRENT.compareTo(minNodeVersionSupplier.get()) <= 0 ? type : PathType.FIXED;
4145
// If the path type is fixed, hash algorithm is not applicable.
42-
pathHashAlgorithm = pathType == PathType.FIXED ? null : PathHashAlgorithm.FNV_1A;
46+
pathHashAlgorithm = pathType == PathType.FIXED ? null : hashAlgorithm;
4347
return new RemoteStorePathStrategy(pathType, pathHashAlgorithm);
4448
}
4549

4650
private void setType(PathType type) {
4751
this.type = type;
4852
}
53+
54+
private void setHashAlgorithm(PathHashAlgorithm hashAlgorithm) {
55+
this.hashAlgorithm = hashAlgorithm;
56+
}
4957
}

server/src/main/java/org/opensearch/index/remote/RemoteStoreUtils.java

+33-3
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import java.util.Base64;
1616
import java.util.HashMap;
1717
import java.util.List;
18+
import java.util.Locale;
1819
import java.util.Map;
1920
import java.util.function.Function;
2021

@@ -26,10 +27,16 @@
2627
public class RemoteStoreUtils {
2728
public static final int LONG_MAX_LENGTH = String.valueOf(Long.MAX_VALUE).length();
2829

30+
/**
31+
* URL safe base 64 character set. This must not be changed as this is used in deriving the base64 equivalent of binary.
32+
*/
33+
static final char[] URL_BASE64_CHARSET = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_".toCharArray();
34+
2935
/**
3036
* This method subtracts given numbers from Long.MAX_VALUE and returns a string representation of the result.
3137
* The resultant string is guaranteed to be of the same length that of Long.MAX_VALUE. If shorter, we add left padding
3238
* of 0s to the string.
39+
*
3340
* @param num number to get the inverted long string for
3441
* @return String value of Long.MAX_VALUE - num
3542
*/
@@ -46,6 +53,7 @@ public static String invertLong(long num) {
4653

4754
/**
4855
* This method converts the given string into long and subtracts it from Long.MAX_VALUE
56+
*
4957
* @param str long in string format to be inverted
5058
* @return long value of the invert result
5159
*/
@@ -59,6 +67,7 @@ public static long invertLong(String str) {
5967

6068
/**
6169
* Extracts the segment name from the provided segment file name
70+
*
6271
* @param filename Segment file name to parse
6372
* @return Name of the segment that the segment file belongs to
6473
*/
@@ -79,10 +88,9 @@ public static String getSegmentName(String filename) {
7988
}
8089

8190
/**
82-
*
8391
* @param mdFiles List of segment/translog metadata files
84-
* @param fn Function to extract PrimaryTerm_Generation and Node Id from metadata file name .
85-
* fn returns null if node id is not part of the file name
92+
* @param fn Function to extract PrimaryTerm_Generation and Node Id from metadata file name .
93+
* fn returns null if node id is not part of the file name
8694
*/
8795
public static void verifyNoMultipleWriters(List<String> mdFiles, Function<String, Tuple<String, String>> fn) {
8896
Map<String, String> nodesByPrimaryTermAndGen = new HashMap<>();
@@ -116,4 +124,26 @@ static String longToUrlBase64(long value) {
116124
String base64Str = Base64.getUrlEncoder().encodeToString(hashBytes);
117125
return base64Str.substring(0, base64Str.length() - 1);
118126
}
127+
128+
static long urlBase64ToLong(String base64Str) {
129+
byte[] hashBytes = Base64.getUrlDecoder().decode(base64Str);
130+
return ByteBuffer.wrap(hashBytes).getLong();
131+
}
132+
133+
/**
134+
* Converts an input hash which occupies 64 bits of memory into a composite encoded string. The string will have 2 parts -
135+
* 1. Base 64 string and 2. Binary String. We will use the first 6 bits for creating the base 64 string.
136+
* For the second part, the rest of the bits (of length {@code len}-6) will be used as is in string form.
137+
*/
138+
static String longToCompositeBase64AndBinaryEncoding(long value, int len) {
139+
if (len < 7 || len > 64) {
140+
throw new IllegalArgumentException("In longToCompositeBase64AndBinaryEncoding, len must be between 7 and 64 (both inclusive)");
141+
}
142+
String binaryEncoding = String.format(Locale.ROOT, "%64s", Long.toBinaryString(value)).replace(' ', '0');
143+
String base64Part = binaryEncoding.substring(0, 6);
144+
String binaryPart = binaryEncoding.substring(6, len);
145+
int base64DecimalValue = Integer.valueOf(base64Part, 2);
146+
assert base64DecimalValue >= 0 && base64DecimalValue < 64;
147+
return URL_BASE64_CHARSET[base64DecimalValue] + binaryPart;
148+
}
119149
}

server/src/main/java/org/opensearch/indices/IndicesService.java

+17-3
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,7 @@
124124
import org.opensearch.index.query.QueryRewriteContext;
125125
import org.opensearch.index.recovery.RecoveryStats;
126126
import org.opensearch.index.refresh.RefreshStats;
127+
import org.opensearch.index.remote.RemoteStoreEnums.PathHashAlgorithm;
127128
import org.opensearch.index.remote.RemoteStoreEnums.PathType;
128129
import org.opensearch.index.remote.RemoteStoreStatsTrackerFactory;
129130
import org.opensearch.index.search.stats.SearchStats;
@@ -307,17 +308,30 @@ public class IndicesService extends AbstractLifecycleComponent
307308
);
308309

309310
/**
310-
* This setting is used to set the remote store blob store path prefix strategy. This setting is effective only for
311+
* This setting is used to set the remote store blob store path type strategy. This setting is effective only for
311312
* remote store enabled cluster.
312313
*/
313-
public static final Setting<PathType> CLUSTER_REMOTE_STORE_PATH_PREFIX_TYPE_SETTING = new Setting<>(
314-
"cluster.remote_store.index.path.prefix.type",
314+
public static final Setting<PathType> CLUSTER_REMOTE_STORE_PATH_TYPE_SETTING = new Setting<>(
315+
"cluster.remote_store.index.path.type",
315316
PathType.FIXED.toString(),
316317
PathType::parseString,
317318
Property.NodeScope,
318319
Property.Dynamic
319320
);
320321

322+
/**
323+
* This setting is used to set the remote store blob store path hash algorithm strategy. This setting is effective only for
324+
* remote store enabled cluster. This setting will come to effect if the {@link #CLUSTER_REMOTE_STORE_PATH_TYPE_SETTING}
325+
* is either {@code HASHED_PREFIX} or {@code HASHED_INFIX}.
326+
*/
327+
public static final Setting<PathHashAlgorithm> CLUSTER_REMOTE_STORE_PATH_HASH_ALGORITHM_SETTING = new Setting<>(
328+
"cluster.remote_store.index.path.hash_algorithm",
329+
PathHashAlgorithm.FNV_1A_COMPOSITE_1.toString(),
330+
PathHashAlgorithm::parseString,
331+
Property.NodeScope,
332+
Property.Dynamic
333+
);
334+
321335
/**
322336
* The node's settings.
323337
*/

server/src/test/java/org/opensearch/cluster/metadata/MetadataCreateIndexServiceTests.java

+2-2
Original file line numberDiff line numberDiff line change
@@ -1711,7 +1711,7 @@ public void testRemoteCustomData() {
17111711
validateRemoteCustomData(
17121712
indexMetadata.getCustomData(IndexMetadata.REMOTE_STORE_CUSTOM_KEY),
17131713
PathHashAlgorithm.NAME,
1714-
PathHashAlgorithm.FNV_1A.name()
1714+
PathHashAlgorithm.FNV_1A_COMPOSITE_1.name()
17151715
);
17161716
}
17171717

@@ -1720,7 +1720,7 @@ private IndexMetadata testRemoteCustomData(boolean remoteStoreEnabled, PathType
17201720
if (remoteStoreEnabled) {
17211721
settingsBuilder.put(NODE_ATTRIBUTES.getKey() + REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY, "test");
17221722
}
1723-
settingsBuilder.put(IndicesService.CLUSTER_REMOTE_STORE_PATH_PREFIX_TYPE_SETTING.getKey(), pathType.toString());
1723+
settingsBuilder.put(IndicesService.CLUSTER_REMOTE_STORE_PATH_TYPE_SETTING.getKey(), pathType.toString());
17241724
Settings settings = settingsBuilder.build();
17251725

17261726
ClusterService clusterService = mock(ClusterService.class);

0 commit comments

Comments
 (0)