|
12 | 12 | import org.apache.logging.log4j.Logger;
|
13 | 13 | import org.opensearch.cluster.ClusterState;
|
14 | 14 | import org.opensearch.cluster.metadata.DiffableStringMap;
|
| 15 | +import org.opensearch.common.CheckedFunction; |
15 | 16 | import org.opensearch.common.io.stream.BytesStreamOutput;
|
16 | 17 | import org.opensearch.common.settings.Settings;
|
| 18 | +import org.opensearch.common.unit.TimeValue; |
17 | 19 | import org.opensearch.core.common.io.stream.StreamInput;
|
18 | 20 | import org.opensearch.core.common.io.stream.StreamOutput;
|
19 | 21 | import org.opensearch.core.common.io.stream.Writeable;
|
|
22 | 24 | import org.opensearch.core.xcontent.XContentParseException;
|
23 | 25 | import org.opensearch.core.xcontent.XContentParser;
|
24 | 26 | import org.opensearch.index.translog.BufferedChecksumStreamOutput;
|
| 27 | +import org.opensearch.threadpool.ThreadPool; |
25 | 28 |
|
26 | 29 | import java.io.IOException;
|
27 | 30 | import java.util.ArrayList;
|
28 | 31 | import java.util.List;
|
29 | 32 | import java.util.Objects;
|
| 33 | +import java.util.concurrent.CountDownLatch; |
| 34 | +import java.util.concurrent.ExecutorService; |
| 35 | +import java.util.function.Consumer; |
30 | 36 |
|
31 | 37 | import com.jcraft.jzlib.JZlib;
|
32 | 38 |
|
|
37 | 43 | */
|
38 | 44 | public class ClusterStateChecksum implements ToXContentFragment, Writeable {
|
39 | 45 |
|
| 46 | + public static final int COMPONENT_SIZE = 11; |
40 | 47 | static final String ROUTING_TABLE_CS = "routing_table";
|
41 | 48 | static final String NODES_CS = "discovery_nodes";
|
42 | 49 | static final String BLOCKS_CS = "blocks";
|
@@ -65,62 +72,103 @@ public class ClusterStateChecksum implements ToXContentFragment, Writeable {
|
65 | 72 | long indicesChecksum;
|
66 | 73 | long clusterStateChecksum;
|
67 | 74 |
|
68 |
| - public ClusterStateChecksum(ClusterState clusterState) { |
69 |
| - try ( |
70 |
| - BytesStreamOutput out = new BytesStreamOutput(); |
71 |
| - BufferedChecksumStreamOutput checksumOut = new BufferedChecksumStreamOutput(out) |
72 |
| - ) { |
73 |
| - clusterState.routingTable().writeVerifiableTo(checksumOut); |
74 |
| - routingTableChecksum = checksumOut.getChecksum(); |
75 |
| - |
76 |
| - checksumOut.reset(); |
77 |
| - clusterState.nodes().writeVerifiableTo(checksumOut); |
78 |
| - nodesChecksum = checksumOut.getChecksum(); |
79 |
| - |
80 |
| - checksumOut.reset(); |
81 |
| - clusterState.coordinationMetadata().writeVerifiableTo(checksumOut); |
82 |
| - coordinationMetadataChecksum = checksumOut.getChecksum(); |
83 |
| - |
84 |
| - // Settings create sortedMap by default, so no explicit sorting required here. |
85 |
| - checksumOut.reset(); |
86 |
| - Settings.writeSettingsToStream(clusterState.metadata().persistentSettings(), checksumOut); |
87 |
| - settingMetadataChecksum = checksumOut.getChecksum(); |
88 |
| - |
89 |
| - checksumOut.reset(); |
90 |
| - Settings.writeSettingsToStream(clusterState.metadata().transientSettings(), checksumOut); |
91 |
| - transientSettingsMetadataChecksum = checksumOut.getChecksum(); |
92 |
| - |
93 |
| - checksumOut.reset(); |
94 |
| - clusterState.metadata().templatesMetadata().writeVerifiableTo(checksumOut); |
95 |
| - templatesMetadataChecksum = checksumOut.getChecksum(); |
96 |
| - |
97 |
| - checksumOut.reset(); |
98 |
| - checksumOut.writeStringCollection(clusterState.metadata().customs().keySet()); |
99 |
| - customMetadataMapChecksum = checksumOut.getChecksum(); |
100 |
| - |
101 |
| - checksumOut.reset(); |
102 |
| - ((DiffableStringMap) clusterState.metadata().hashesOfConsistentSettings()).writeTo(checksumOut); |
103 |
| - hashesOfConsistentSettingsChecksum = checksumOut.getChecksum(); |
104 |
| - |
105 |
| - checksumOut.reset(); |
106 |
| - checksumOut.writeMapValues( |
| 75 | + public ClusterStateChecksum(ClusterState clusterState, ThreadPool threadpool) { |
| 76 | + long start = threadpool.relativeTimeInNanos(); |
| 77 | + ExecutorService executorService = threadpool.executor(ThreadPool.Names.REMOTE_STATE_CHECKSUM); |
| 78 | + CountDownLatch latch = new CountDownLatch(COMPONENT_SIZE); |
| 79 | + |
| 80 | + executeChecksumTask((stream) -> { |
| 81 | + clusterState.routingTable().writeVerifiableTo(stream); |
| 82 | + return null; |
| 83 | + }, checksum -> routingTableChecksum = checksum, executorService, latch); |
| 84 | + |
| 85 | + executeChecksumTask((stream) -> { |
| 86 | + clusterState.nodes().writeVerifiableTo(stream); |
| 87 | + return null; |
| 88 | + }, checksum -> nodesChecksum = checksum, executorService, latch); |
| 89 | + |
| 90 | + executeChecksumTask((stream) -> { |
| 91 | + clusterState.coordinationMetadata().writeVerifiableTo(stream); |
| 92 | + return null; |
| 93 | + }, checksum -> coordinationMetadataChecksum = checksum, executorService, latch); |
| 94 | + |
| 95 | + executeChecksumTask((stream) -> { |
| 96 | + Settings.writeSettingsToStream(clusterState.metadata().persistentSettings(), stream); |
| 97 | + return null; |
| 98 | + }, checksum -> settingMetadataChecksum = checksum, executorService, latch); |
| 99 | + |
| 100 | + executeChecksumTask((stream) -> { |
| 101 | + Settings.writeSettingsToStream(clusterState.metadata().transientSettings(), stream); |
| 102 | + return null; |
| 103 | + }, checksum -> transientSettingsMetadataChecksum = checksum, executorService, latch); |
| 104 | + |
| 105 | + executeChecksumTask((stream) -> { |
| 106 | + clusterState.metadata().templatesMetadata().writeVerifiableTo(stream); |
| 107 | + return null; |
| 108 | + }, checksum -> templatesMetadataChecksum = checksum, executorService, latch); |
| 109 | + |
| 110 | + executeChecksumTask((stream) -> { |
| 111 | + stream.writeStringCollection(clusterState.metadata().customs().keySet()); |
| 112 | + return null; |
| 113 | + }, checksum -> customMetadataMapChecksum = checksum, executorService, latch); |
| 114 | + |
| 115 | + executeChecksumTask((stream) -> { |
| 116 | + ((DiffableStringMap) clusterState.metadata().hashesOfConsistentSettings()).writeTo(stream); |
| 117 | + return null; |
| 118 | + }, checksum -> hashesOfConsistentSettingsChecksum = checksum, executorService, latch); |
| 119 | + |
| 120 | + executeChecksumTask((stream) -> { |
| 121 | + stream.writeMapValues( |
107 | 122 | clusterState.metadata().indices(),
|
108 |
| - (stream, value) -> value.writeVerifiableTo((BufferedChecksumStreamOutput) stream) |
| 123 | + (checksumStream, value) -> value.writeVerifiableTo((BufferedChecksumStreamOutput) checksumStream) |
109 | 124 | );
|
110 |
| - indicesChecksum = checksumOut.getChecksum(); |
111 |
| - |
112 |
| - checksumOut.reset(); |
113 |
| - clusterState.blocks().writeVerifiableTo(checksumOut); |
114 |
| - blocksChecksum = checksumOut.getChecksum(); |
115 |
| - |
116 |
| - checksumOut.reset(); |
117 |
| - checksumOut.writeStringCollection(clusterState.customs().keySet()); |
118 |
| - clusterStateCustomsChecksum = checksumOut.getChecksum(); |
119 |
| - } catch (IOException e) { |
120 |
| - logger.error("Failed to create checksum for cluster state.", e); |
| 125 | + return null; |
| 126 | + }, checksum -> indicesChecksum = checksum, executorService, latch); |
| 127 | + |
| 128 | + executeChecksumTask((stream) -> { |
| 129 | + clusterState.blocks().writeVerifiableTo(stream); |
| 130 | + return null; |
| 131 | + }, checksum -> blocksChecksum = checksum, executorService, latch); |
| 132 | + |
| 133 | + executeChecksumTask((stream) -> { |
| 134 | + stream.writeStringCollection(clusterState.customs().keySet()); |
| 135 | + return null; |
| 136 | + }, checksum -> clusterStateCustomsChecksum = checksum, executorService, latch); |
| 137 | + |
| 138 | + try { |
| 139 | + latch.await(); |
| 140 | + } catch (InterruptedException e) { |
121 | 141 | throw new RemoteStateTransferException("Failed to create checksum for cluster state.", e);
|
122 | 142 | }
|
123 | 143 | createClusterStateChecksum();
|
| 144 | + logger.debug("Checksum execution time {}", TimeValue.nsecToMSec(threadpool.relativeTimeInNanos() - start)); |
| 145 | + } |
| 146 | + |
| 147 | + private void executeChecksumTask( |
| 148 | + CheckedFunction<BufferedChecksumStreamOutput, Void, IOException> checksumTask, |
| 149 | + Consumer<Long> checksumConsumer, |
| 150 | + ExecutorService executorService, |
| 151 | + CountDownLatch latch |
| 152 | + ) { |
| 153 | + executorService.execute(() -> { |
| 154 | + try { |
| 155 | + long checksum = createChecksum(checksumTask); |
| 156 | + checksumConsumer.accept(checksum); |
| 157 | + latch.countDown(); |
| 158 | + } catch (IOException e) { |
| 159 | + throw new RemoteStateTransferException("Failed to execute checksum task", e); |
| 160 | + } |
| 161 | + }); |
| 162 | + } |
| 163 | + |
| 164 | + private long createChecksum(CheckedFunction<BufferedChecksumStreamOutput, Void, IOException> task) throws IOException { |
| 165 | + try ( |
| 166 | + BytesStreamOutput out = new BytesStreamOutput(); |
| 167 | + BufferedChecksumStreamOutput checksumOut = new BufferedChecksumStreamOutput(out) |
| 168 | + ) { |
| 169 | + task.apply(checksumOut); |
| 170 | + return checksumOut.getChecksum(); |
| 171 | + } |
124 | 172 | }
|
125 | 173 |
|
126 | 174 | private void createClusterStateChecksum() {
|
|
0 commit comments