From 5a942ffe17b3be3d0ca44753d73f419b25c7d152 Mon Sep 17 00:00:00 2001 From: Sachin Kale <kalsac@amazon.com> Date: Sun, 30 Jun 2024 17:43:51 +0530 Subject: [PATCH 1/3] Fix assertion failure while deleting remote backed index Signed-off-by: Sachin Kale <kalsac@amazon.com> --- .../RemoteMigrationIndexMetadataUpdateIT.java | 2 -- .../opensearch/remotestore/RemoteStoreIT.java | 14 +++++++++- .../org/opensearch/index/IndexService.java | 26 ++++++++++++++++++- 3 files changed, 38 insertions(+), 4 deletions(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteMigrationIndexMetadataUpdateIT.java b/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteMigrationIndexMetadataUpdateIT.java index 6885d37c4aab0..216c104dfecc1 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteMigrationIndexMetadataUpdateIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteMigrationIndexMetadataUpdateIT.java @@ -275,7 +275,6 @@ initalMetadataVersion < internalCluster().client() * After shard relocation completes, shuts down the docrep nodes and asserts remote * index settings are applied even when the index is in YELLOW state */ - @AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/13737") public void testIndexSettingsUpdatedEvenForMisconfiguredReplicas() throws Exception { internalCluster().startClusterManagerOnlyNode(); @@ -332,7 +331,6 @@ public void testIndexSettingsUpdatedEvenForMisconfiguredReplicas() throws Except * After shard relocation completes, restarts the docrep node holding extra replica shard copy * and asserts remote index settings are applied as soon as the docrep replica copy is unassigned */ - @AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/13871") public void testIndexSettingsUpdatedWhenDocrepNodeIsRestarted() throws Exception { internalCluster().startClusterManagerOnlyNode(); diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreIT.java index 96d6338e5913b..3f10cda605bc5 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreIT.java @@ -65,7 +65,6 @@ import static org.opensearch.index.remote.RemoteStoreEnums.DataType.METADATA; import static org.opensearch.index.shard.IndexShardTestCase.getTranslog; import static org.opensearch.indices.RemoteStoreSettings.CLUSTER_REMOTE_TRANSLOG_BUFFER_INTERVAL_SETTING; -import static org.opensearch.test.OpenSearchTestCase.getShardLevelBlobPath; import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertHitCount; import static org.hamcrest.Matchers.comparesEqualTo; @@ -133,6 +132,19 @@ private void testPeerRecovery(int numberOfIterations, boolean invokeFlush) throw ); } + public void testRemoteStoreIndexCreationAndDeletion() throws InterruptedException, ExecutionException { + String dataNode = internalCluster().startNodes(1).get(0); + createIndex(INDEX_NAME, remoteStoreIndexSettings(0)); + ensureYellowAndNoInitializingShards(INDEX_NAME); + ensureGreen(INDEX_NAME); + + IndexShard indexShard = getIndexShard(dataNode, INDEX_NAME); + + indexShard.store().incRef(); + assertAcked(client().admin().indices().prepareDelete(INDEX_NAME)); + indexShard.store().decRef(); + } + public void testPeerRecoveryWithRemoteStoreAndRemoteTranslogNoDataFlush() throws Exception { testPeerRecovery(1, true); } diff --git a/server/src/main/java/org/opensearch/index/IndexService.java b/server/src/main/java/org/opensearch/index/IndexService.java index a7849bcf80474..83e40b2716746 100644 --- a/server/src/main/java/org/opensearch/index/IndexService.java +++ b/server/src/main/java/org/opensearch/index/IndexService.java @@ -597,7 +597,14 @@ public synchronized IndexShard createShard( this.indexSettings.getRemoteStorePathStrategy() ); } - remoteStore = new Store(shardId, this.indexSettings, remoteDirectory, lock, Store.OnClose.EMPTY, path); + remoteStore = new Store( + shardId, + this.indexSettings, + remoteDirectory, + getRemoteStoreLock(shardId), + Store.OnClose.EMPTY, + path + ); } else { // Disallow shards with remote store based settings to be created on non-remote store enabled nodes // Even though we have `RemoteStoreMigrationAllocationDecider` in place to prevent something like this from happening at the @@ -678,6 +685,23 @@ public synchronized IndexShard createShard( } } + // When an instance of Store is created, a shardlock is created which is released on closing the instance of store. + // Currently, we create 2 instances of store for remote store backed indices: store and remoteStore. + // As there can be only one shardlock acquired for a given shard, the lock is shared between store and remoteStore. + // This creates an issue when we are deleting the index as it results in closing both store and remoteStore. + // Sample test failure: https://github.com/opensearch-project/OpenSearch/issues/13871 + // The following method provides ShardLock that is not maintained by NodeEnvironment. + // As part of https://github.com/opensearch-project/OpenSearch/issues/13075, we want to move away from keeping 2 + // store instances. + private ShardLock getRemoteStoreLock(ShardId shardId) { + return new ShardLock(shardId) { + @Override + protected void closeInternal() { + // Ignore for remote store + } + }; + } + /* Fetches the shard path based on the index type - For a remote snapshot index, the cache path is used to initialize the shards. From 299dd90bdf7889f1402c283586f4ec2a93dda83c Mon Sep 17 00:00:00 2001 From: Sachin Kale <kalsac@amazon.com> Date: Mon, 1 Jul 2024 11:42:14 +0530 Subject: [PATCH 2/3] Add details to the test Signed-off-by: Sachin Kale <kalsac@amazon.com> --- .../java/org/opensearch/remotestore/RemoteStoreIT.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreIT.java index 3f10cda605bc5..194dce5f4a57a 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreIT.java @@ -132,7 +132,7 @@ private void testPeerRecovery(int numberOfIterations, boolean invokeFlush) throw ); } - public void testRemoteStoreIndexCreationAndDeletion() throws InterruptedException, ExecutionException { + public void testRemoteStoreIndexCreationAndDeletionWithReferencedStore() throws InterruptedException, ExecutionException { String dataNode = internalCluster().startNodes(1).get(0); createIndex(INDEX_NAME, remoteStoreIndexSettings(0)); ensureYellowAndNoInitializingShards(INDEX_NAME); @@ -140,6 +140,8 @@ public void testRemoteStoreIndexCreationAndDeletion() throws InterruptedExceptio IndexShard indexShard = getIndexShard(dataNode, INDEX_NAME); + // Simulating a condition where store is already in use by increasing ref count, this helps in testing index + // deletion when refresh is in-progress. indexShard.store().incRef(); assertAcked(client().admin().indices().prepareDelete(INDEX_NAME)); indexShard.store().decRef(); From 736aa636f463457267b61d698be9456e907a39fb Mon Sep 17 00:00:00 2001 From: Sachin Kale <kalsac@amazon.com> Date: Tue, 2 Jul 2024 09:13:20 +0530 Subject: [PATCH 3/3] Move remote store shard lock creation inline Signed-off-by: Sachin Kale <kalsac@amazon.com> --- .../org/opensearch/index/IndexService.java | 41 +++++++------------ 1 file changed, 15 insertions(+), 26 deletions(-) diff --git a/server/src/main/java/org/opensearch/index/IndexService.java b/server/src/main/java/org/opensearch/index/IndexService.java index 83e40b2716746..6fc29e3bfbffd 100644 --- a/server/src/main/java/org/opensearch/index/IndexService.java +++ b/server/src/main/java/org/opensearch/index/IndexService.java @@ -597,14 +597,21 @@ public synchronized IndexShard createShard( this.indexSettings.getRemoteStorePathStrategy() ); } - remoteStore = new Store( - shardId, - this.indexSettings, - remoteDirectory, - getRemoteStoreLock(shardId), - Store.OnClose.EMPTY, - path - ); + // When an instance of Store is created, a shardlock is created which is released on closing the instance of store. + // Currently, we create 2 instances of store for remote store backed indices: store and remoteStore. + // As there can be only one shardlock acquired for a given shard, the lock is shared between store and remoteStore. + // This creates an issue when we are deleting the index as it results in closing both store and remoteStore. + // Sample test failure: https://github.com/opensearch-project/OpenSearch/issues/13871 + // The following method provides ShardLock that is not maintained by NodeEnvironment. + // As part of https://github.com/opensearch-project/OpenSearch/issues/13075, we want to move away from keeping 2 + // store instances. + ShardLock remoteStoreLock = new ShardLock(shardId) { + @Override + protected void closeInternal() { + // Do nothing for shard lock on remote store + } + }; + remoteStore = new Store(shardId, this.indexSettings, remoteDirectory, remoteStoreLock, Store.OnClose.EMPTY, path); } else { // Disallow shards with remote store based settings to be created on non-remote store enabled nodes // Even though we have `RemoteStoreMigrationAllocationDecider` in place to prevent something like this from happening at the @@ -627,7 +634,6 @@ public synchronized IndexShard createShard( } else { directory = directoryFactory.newDirectory(this.indexSettings, path); } - store = new Store( shardId, this.indexSettings, @@ -685,23 +691,6 @@ public synchronized IndexShard createShard( } } - // When an instance of Store is created, a shardlock is created which is released on closing the instance of store. - // Currently, we create 2 instances of store for remote store backed indices: store and remoteStore. - // As there can be only one shardlock acquired for a given shard, the lock is shared between store and remoteStore. - // This creates an issue when we are deleting the index as it results in closing both store and remoteStore. - // Sample test failure: https://github.com/opensearch-project/OpenSearch/issues/13871 - // The following method provides ShardLock that is not maintained by NodeEnvironment. - // As part of https://github.com/opensearch-project/OpenSearch/issues/13075, we want to move away from keeping 2 - // store instances. - private ShardLock getRemoteStoreLock(ShardId shardId) { - return new ShardLock(shardId) { - @Override - protected void closeInternal() { - // Ignore for remote store - } - }; - } - /* Fetches the shard path based on the index type - For a remote snapshot index, the cache path is used to initialize the shards.