Skip to content

Commit 9e9ab6b

Browse files
[Remote Store] Add capability of doing flush as determined by the translog (opensearch-project#12992)
Signed-off-by: Shubh Sahu <shubhvs@amazon.com>
1 parent c055a3d commit 9e9ab6b

File tree

10 files changed

+121
-2
lines changed

10 files changed

+121
-2
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
2424
- Add an individual setting of rate limiter for segment replication ([#12959](https://github.com/opensearch-project/OpenSearch/pull/12959))
2525
- [Streaming Indexing] Ensure support of the new transport by security plugin ([#13174](https://github.com/opensearch-project/OpenSearch/pull/13174))
2626
- Add cluster setting to dynamically configure the buckets for filter rewrite optimization. ([#13179](https://github.com/opensearch-project/OpenSearch/pull/13179))
27+
- [Remote Store] Add capability of doing refresh as determined by the translog ([#12992](https://github.com/opensearch-project/OpenSearch/pull/12992))
2728

2829
### Dependencies
2930
- Bump `org.apache.commons:commons-configuration2` from 2.10.0 to 2.10.1 ([#12896](https://github.com/opensearch-project/OpenSearch/pull/12896))

server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreIT.java

+43
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
import org.opensearch.index.IndexSettings;
3131
import org.opensearch.index.shard.IndexShard;
3232
import org.opensearch.index.shard.IndexShardClosedException;
33+
import org.opensearch.index.translog.Translog;
3334
import org.opensearch.index.translog.Translog.Durability;
3435
import org.opensearch.indices.IndicesService;
3536
import org.opensearch.indices.RemoteStoreSettings;
@@ -63,6 +64,7 @@
6364
import static org.opensearch.index.remote.RemoteStoreEnums.DataCategory.TRANSLOG;
6465
import static org.opensearch.index.remote.RemoteStoreEnums.DataType.DATA;
6566
import static org.opensearch.index.remote.RemoteStoreEnums.DataType.METADATA;
67+
import static org.opensearch.index.shard.IndexShardTestCase.getTranslog;
6668
import static org.opensearch.indices.RemoteStoreSettings.CLUSTER_REMOTE_TRANSLOG_BUFFER_INTERVAL_SETTING;
6769
import static org.opensearch.test.OpenSearchTestCase.getShardLevelBlobPath;
6870
import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked;
@@ -859,4 +861,45 @@ public void testLocalOnlyTranslogCleanupOnNodeRestart() throws Exception {
859861
refresh(INDEX_NAME);
860862
assertHitCount(client(dataNode).prepareSearch(INDEX_NAME).setSize(0).get(), searchableDocs + 15);
861863
}
864+
865+
public void testFlushOnTooManyRemoteTranslogFiles() throws Exception {
866+
internalCluster().startClusterManagerOnlyNode();
867+
String datanode = internalCluster().startDataOnlyNodes(1).get(0);
868+
createIndex(INDEX_NAME, remoteStoreIndexSettings(0, 10000L, -1));
869+
ensureGreen(INDEX_NAME);
870+
871+
ClusterUpdateSettingsRequest updateSettingsRequest = new ClusterUpdateSettingsRequest();
872+
updateSettingsRequest.persistentSettings(
873+
Settings.builder().put(RemoteStoreSettings.CLUSTER_REMOTE_MAX_TRANSLOG_READERS.getKey(), "100")
874+
);
875+
assertAcked(client().admin().cluster().updateSettings(updateSettingsRequest).actionGet());
876+
877+
IndexShard indexShard = getIndexShard(datanode, INDEX_NAME);
878+
Path translogLocation = getTranslog(indexShard).location();
879+
assertFalse(indexShard.shouldPeriodicallyFlush());
880+
881+
try (Stream<Path> files = Files.list(translogLocation)) {
882+
long totalFiles = files.filter(f -> f.getFileName().toString().endsWith(Translog.TRANSLOG_FILE_SUFFIX)).count();
883+
assertEquals(totalFiles, 1L);
884+
}
885+
886+
// indexing 100 documents (100 bulk requests), no flush will be triggered yet
887+
for (int i = 0; i < 100; i++) {
888+
indexBulk(INDEX_NAME, 1);
889+
}
890+
891+
try (Stream<Path> files = Files.list(translogLocation)) {
892+
long totalFiles = files.filter(f -> f.getFileName().toString().endsWith(Translog.TRANSLOG_FILE_SUFFIX)).count();
893+
assertEquals(totalFiles, 101L);
894+
}
895+
// Will flush and trim the translog readers
896+
indexBulk(INDEX_NAME, 1);
897+
898+
assertBusy(() -> {
899+
try (Stream<Path> files = Files.list(translogLocation)) {
900+
long totalFiles = files.filter(f -> f.getFileName().toString().endsWith(Translog.TRANSLOG_FILE_SUFFIX)).count();
901+
assertEquals(totalFiles, 1L);
902+
}
903+
}, 30, TimeUnit.SECONDS);
904+
}
862905
}

server/src/main/java/org/opensearch/common/settings/ClusterSettings.java

+2-1
Original file line numberDiff line numberDiff line change
@@ -732,7 +732,8 @@ public void apply(Settings value, Settings current, Settings previous) {
732732
RemoteStoreSettings.CLUSTER_REMOTE_TRANSLOG_BUFFER_INTERVAL_SETTING,
733733
RemoteStoreSettings.CLUSTER_REMOTE_TRANSLOG_TRANSFER_TIMEOUT_SETTING,
734734
RemoteStoreSettings.CLUSTER_REMOTE_STORE_PATH_TYPE_SETTING,
735-
RemoteStoreSettings.CLUSTER_REMOTE_STORE_PATH_HASH_ALGORITHM_SETTING
735+
RemoteStoreSettings.CLUSTER_REMOTE_STORE_PATH_HASH_ALGORITHM_SETTING,
736+
RemoteStoreSettings.CLUSTER_REMOTE_MAX_TRANSLOG_READERS
736737
)
737738
)
738739
);

server/src/main/java/org/opensearch/index/shard/IndexShard.java

+2-1
Original file line numberDiff line numberDiff line change
@@ -2917,7 +2917,7 @@ public void restoreFromRepository(Repository repository, ActionListener<Boolean>
29172917
*
29182918
* @return {@code true} if the engine should be flushed
29192919
*/
2920-
boolean shouldPeriodicallyFlush() {
2920+
public boolean shouldPeriodicallyFlush() {
29212921
final Engine engine = getEngineOrNull();
29222922
if (engine != null) {
29232923
try {
@@ -4493,6 +4493,7 @@ public Durability getTranslogDurability() {
44934493
/**
44944494
* Schedules a flush or translog generation roll if needed but will not schedule more than one concurrently. The operation will be
44954495
* executed asynchronously on the flush thread pool.
4496+
* Can also schedule a flush if decided by translog manager
44964497
*/
44974498
public void afterWriteOperation() {
44984499
if (shouldPeriodicallyFlush() || shouldRollTranslogGeneration()) {

server/src/main/java/org/opensearch/index/translog/InternalTranslogManager.java

+6
Original file line numberDiff line numberDiff line change
@@ -437,6 +437,12 @@ public String getTranslogUUID() {
437437
* @return if the translog should be flushed
438438
*/
439439
public boolean shouldPeriodicallyFlush(long localCheckpointOfLastCommit, long flushThreshold) {
440+
/*
441+
* This can trigger flush depending upon translog's implementation
442+
*/
443+
if (translog.shouldFlush()) {
444+
return true;
445+
}
440446
// This is the minimum seqNo that is referred in translog and considered for calculating translog size
441447
long minTranslogRefSeqNo = translog.getMinUnreferencedSeqNoInSegments(localCheckpointOfLastCommit + 1);
442448
final long minReferencedTranslogGeneration = translog.getMinGenerationForSeqNo(minTranslogRefSeqNo).translogFileGeneration;

server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java

+11
Original file line numberDiff line numberDiff line change
@@ -675,4 +675,15 @@ public long getMinUnreferencedSeqNoInSegments(long minUnrefCheckpointInLastCommi
675675
int availablePermits() {
676676
return syncPermit.availablePermits();
677677
}
678+
679+
/**
680+
* Checks whether or not the shard should be flushed based on translog files.
681+
* This checks if number of translog files breaches the threshold count determined by
682+
* {@code cluster.remote_store.translog.max_readers} setting
683+
* @return {@code true} if the shard should be flushed
684+
*/
685+
@Override
686+
protected boolean shouldFlush() {
687+
return readers.size() >= translogTransferManager.getMaxRemoteTranslogReadersSettings();
688+
}
678689
}

server/src/main/java/org/opensearch/index/translog/Translog.java

+9
Original file line numberDiff line numberDiff line change
@@ -2082,4 +2082,13 @@ public static String createEmptyTranslog(
20822082
public long getMinUnreferencedSeqNoInSegments(long minUnrefCheckpointInLastCommit) {
20832083
return minUnrefCheckpointInLastCommit;
20842084
}
2085+
2086+
/**
2087+
* Checks whether or not the shard should be flushed based on translog files.
2088+
* each translog type can have it's own decider
2089+
* @return {@code true} if the shard should be flushed
2090+
*/
2091+
protected boolean shouldFlush() {
2092+
return false;
2093+
}
20852094
}

server/src/main/java/org/opensearch/index/translog/transfer/TranslogTransferManager.java

+4
Original file line numberDiff line numberDiff line change
@@ -585,4 +585,8 @@ public void onFailure(Exception e) {
585585
throw e;
586586
}
587587
}
588+
589+
public int getMaxRemoteTranslogReadersSettings() {
590+
return this.remoteStoreSettings.getMaxRemoteTranslogReaders();
591+
}
588592
}

server/src/main/java/org/opensearch/indices/RemoteStoreSettings.java

+23
Original file line numberDiff line numberDiff line change
@@ -94,11 +94,23 @@ public class RemoteStoreSettings {
9494
Property.Dynamic
9595
);
9696

97+
/**
98+
* Controls the maximum referenced remote translog files. If breached the shard will be flushed.
99+
*/
100+
public static final Setting<Integer> CLUSTER_REMOTE_MAX_TRANSLOG_READERS = Setting.intSetting(
101+
"cluster.remote_store.translog.max_readers",
102+
1000,
103+
100,
104+
Property.Dynamic,
105+
Property.NodeScope
106+
);
107+
97108
private volatile TimeValue clusterRemoteTranslogBufferInterval;
98109
private volatile int minRemoteSegmentMetadataFiles;
99110
private volatile TimeValue clusterRemoteTranslogTransferTimeout;
100111
private volatile RemoteStoreEnums.PathType pathType;
101112
private volatile RemoteStoreEnums.PathHashAlgorithm pathHashAlgorithm;
113+
private volatile int maxRemoteTranslogReaders;
102114

103115
public RemoteStoreSettings(Settings settings, ClusterSettings clusterSettings) {
104116
clusterRemoteTranslogBufferInterval = CLUSTER_REMOTE_TRANSLOG_BUFFER_INTERVAL_SETTING.get(settings);
@@ -124,6 +136,9 @@ public RemoteStoreSettings(Settings settings, ClusterSettings clusterSettings) {
124136

125137
pathHashAlgorithm = clusterSettings.get(CLUSTER_REMOTE_STORE_PATH_HASH_ALGORITHM_SETTING);
126138
clusterSettings.addSettingsUpdateConsumer(CLUSTER_REMOTE_STORE_PATH_HASH_ALGORITHM_SETTING, this::setPathHashAlgorithm);
139+
140+
maxRemoteTranslogReaders = CLUSTER_REMOTE_MAX_TRANSLOG_READERS.get(settings);
141+
clusterSettings.addSettingsUpdateConsumer(CLUSTER_REMOTE_MAX_TRANSLOG_READERS, this::setMaxRemoteTranslogReaders);
127142
}
128143

129144
public TimeValue getClusterRemoteTranslogBufferInterval() {
@@ -167,4 +182,12 @@ private void setPathType(RemoteStoreEnums.PathType pathType) {
167182
private void setPathHashAlgorithm(RemoteStoreEnums.PathHashAlgorithm pathHashAlgorithm) {
168183
this.pathHashAlgorithm = pathHashAlgorithm;
169184
}
185+
186+
public int getMaxRemoteTranslogReaders() {
187+
return maxRemoteTranslogReaders;
188+
}
189+
190+
private void setMaxRemoteTranslogReaders(int maxRemoteTranslogReaders) {
191+
this.maxRemoteTranslogReaders = maxRemoteTranslogReaders;
192+
}
170193
}

server/src/test/java/org/opensearch/indices/RemoteStoreSettingsDynamicUpdateTests.java

+20
Original file line numberDiff line numberDiff line change
@@ -96,4 +96,24 @@ public void testClusterRemoteTranslogTransferTimeout() {
9696
);
9797
assertEquals(TimeValue.timeValueSeconds(40), remoteStoreSettings.getClusterRemoteTranslogTransferTimeout());
9898
}
99+
100+
public void testMaxRemoteReferencedTranslogFiles() {
101+
// Test default value
102+
assertEquals(1000, remoteStoreSettings.getMaxRemoteTranslogReaders());
103+
104+
// Test override with valid value
105+
clusterSettings.applySettings(
106+
Settings.builder().put(RemoteStoreSettings.CLUSTER_REMOTE_MAX_TRANSLOG_READERS.getKey(), "500").build()
107+
);
108+
assertEquals(500, remoteStoreSettings.getMaxRemoteTranslogReaders());
109+
110+
// Test override with value less than minimum
111+
assertThrows(
112+
IllegalArgumentException.class,
113+
() -> clusterSettings.applySettings(
114+
Settings.builder().put(RemoteStoreSettings.CLUSTER_REMOTE_MAX_TRANSLOG_READERS.getKey(), "99").build()
115+
)
116+
);
117+
assertEquals(500, remoteStoreSettings.getMaxRemoteTranslogReaders());
118+
}
99119
}

0 commit comments

Comments
 (0)