|
12 | 12 | import org.apache.logging.log4j.Logger; |
13 | 13 | import org.opensearch.cluster.ClusterState; |
14 | 14 | import org.opensearch.cluster.metadata.DiffableStringMap; |
| 15 | +import org.opensearch.common.CheckedFunction; |
15 | 16 | import org.opensearch.common.io.stream.BytesStreamOutput; |
16 | 17 | import org.opensearch.common.settings.Settings; |
| 18 | +import org.opensearch.common.unit.TimeValue; |
17 | 19 | import org.opensearch.core.common.io.stream.BufferedChecksumStreamOutput; |
18 | 20 | import org.opensearch.core.common.io.stream.StreamInput; |
19 | 21 | import org.opensearch.core.common.io.stream.StreamOutput; |
|
22 | 24 | import org.opensearch.core.xcontent.XContentBuilder; |
23 | 25 | import org.opensearch.core.xcontent.XContentParseException; |
24 | 26 | import org.opensearch.core.xcontent.XContentParser; |
| 27 | +import org.opensearch.threadpool.ThreadPool; |
25 | 28 |
|
26 | 29 | import java.io.IOException; |
27 | 30 | import java.util.ArrayList; |
28 | 31 | import java.util.List; |
29 | 32 | import java.util.Objects; |
| 33 | +import java.util.concurrent.CountDownLatch; |
| 34 | +import java.util.concurrent.ExecutorService; |
| 35 | +import java.util.function.Consumer; |
30 | 36 |
|
31 | 37 | import com.jcraft.jzlib.JZlib; |
32 | 38 |
|
|
37 | 43 | */ |
38 | 44 | public class ClusterStateChecksum implements ToXContentFragment, Writeable { |
39 | 45 |
|
| 46 | + public static final int COMPONENT_SIZE = 11; |
40 | 47 | static final String ROUTING_TABLE_CS = "routing_table"; |
41 | 48 | static final String NODES_CS = "discovery_nodes"; |
42 | 49 | static final String BLOCKS_CS = "blocks"; |
@@ -65,62 +72,103 @@ public class ClusterStateChecksum implements ToXContentFragment, Writeable { |
65 | 72 | long indicesChecksum; |
66 | 73 | long clusterStateChecksum; |
67 | 74 |
|
68 | | - public ClusterStateChecksum(ClusterState clusterState) { |
69 | | - try ( |
70 | | - BytesStreamOutput out = new BytesStreamOutput(); |
71 | | - BufferedChecksumStreamOutput checksumOut = new BufferedChecksumStreamOutput(out) |
72 | | - ) { |
73 | | - clusterState.routingTable().writeVerifiableTo(checksumOut); |
74 | | - routingTableChecksum = checksumOut.getChecksum(); |
75 | | - |
76 | | - checksumOut.reset(); |
77 | | - clusterState.nodes().writeVerifiableTo(checksumOut); |
78 | | - nodesChecksum = checksumOut.getChecksum(); |
79 | | - |
80 | | - checksumOut.reset(); |
81 | | - clusterState.coordinationMetadata().writeVerifiableTo(checksumOut); |
82 | | - coordinationMetadataChecksum = checksumOut.getChecksum(); |
83 | | - |
84 | | - // Settings create sortedMap by default, so no explicit sorting required here. |
85 | | - checksumOut.reset(); |
86 | | - Settings.writeSettingsToStream(clusterState.metadata().persistentSettings(), checksumOut); |
87 | | - settingMetadataChecksum = checksumOut.getChecksum(); |
88 | | - |
89 | | - checksumOut.reset(); |
90 | | - Settings.writeSettingsToStream(clusterState.metadata().transientSettings(), checksumOut); |
91 | | - transientSettingsMetadataChecksum = checksumOut.getChecksum(); |
92 | | - |
93 | | - checksumOut.reset(); |
94 | | - clusterState.metadata().templatesMetadata().writeVerifiableTo(checksumOut); |
95 | | - templatesMetadataChecksum = checksumOut.getChecksum(); |
96 | | - |
97 | | - checksumOut.reset(); |
98 | | - checksumOut.writeStringCollection(clusterState.metadata().customs().keySet()); |
99 | | - customMetadataMapChecksum = checksumOut.getChecksum(); |
100 | | - |
101 | | - checksumOut.reset(); |
102 | | - ((DiffableStringMap) clusterState.metadata().hashesOfConsistentSettings()).writeTo(checksumOut); |
103 | | - hashesOfConsistentSettingsChecksum = checksumOut.getChecksum(); |
104 | | - |
105 | | - checksumOut.reset(); |
106 | | - checksumOut.writeMapValues( |
| 75 | + public ClusterStateChecksum(ClusterState clusterState, ThreadPool threadpool) { |
| 76 | + long start = threadpool.relativeTimeInNanos(); |
| 77 | + ExecutorService executorService = threadpool.executor(ThreadPool.Names.REMOTE_STATE_CHECKSUM); |
| 78 | + CountDownLatch latch = new CountDownLatch(COMPONENT_SIZE); |
| 79 | + |
| 80 | + executeChecksumTask((stream) -> { |
| 81 | + clusterState.routingTable().writeVerifiableTo(stream); |
| 82 | + return null; |
| 83 | + }, checksum -> routingTableChecksum = checksum, executorService, latch); |
| 84 | + |
| 85 | + executeChecksumTask((stream) -> { |
| 86 | + clusterState.nodes().writeVerifiableTo(stream); |
| 87 | + return null; |
| 88 | + }, checksum -> nodesChecksum = checksum, executorService, latch); |
| 89 | + |
| 90 | + executeChecksumTask((stream) -> { |
| 91 | + clusterState.coordinationMetadata().writeVerifiableTo(stream); |
| 92 | + return null; |
| 93 | + }, checksum -> coordinationMetadataChecksum = checksum, executorService, latch); |
| 94 | + |
| 95 | + executeChecksumTask((stream) -> { |
| 96 | + Settings.writeSettingsToStream(clusterState.metadata().persistentSettings(), stream); |
| 97 | + return null; |
| 98 | + }, checksum -> settingMetadataChecksum = checksum, executorService, latch); |
| 99 | + |
| 100 | + executeChecksumTask((stream) -> { |
| 101 | + Settings.writeSettingsToStream(clusterState.metadata().transientSettings(), stream); |
| 102 | + return null; |
| 103 | + }, checksum -> transientSettingsMetadataChecksum = checksum, executorService, latch); |
| 104 | + |
| 105 | + executeChecksumTask((stream) -> { |
| 106 | + clusterState.metadata().templatesMetadata().writeVerifiableTo(stream); |
| 107 | + return null; |
| 108 | + }, checksum -> templatesMetadataChecksum = checksum, executorService, latch); |
| 109 | + |
| 110 | + executeChecksumTask((stream) -> { |
| 111 | + stream.writeStringCollection(clusterState.metadata().customs().keySet()); |
| 112 | + return null; |
| 113 | + }, checksum -> customMetadataMapChecksum = checksum, executorService, latch); |
| 114 | + |
| 115 | + executeChecksumTask((stream) -> { |
| 116 | + ((DiffableStringMap) clusterState.metadata().hashesOfConsistentSettings()).writeTo(stream); |
| 117 | + return null; |
| 118 | + }, checksum -> hashesOfConsistentSettingsChecksum = checksum, executorService, latch); |
| 119 | + |
| 120 | + executeChecksumTask((stream) -> { |
| 121 | + stream.writeMapValues( |
107 | 122 | clusterState.metadata().indices(), |
108 | | - (stream, value) -> value.writeVerifiableTo((BufferedChecksumStreamOutput) stream) |
| 123 | + (checksumStream, value) -> value.writeVerifiableTo((BufferedChecksumStreamOutput) checksumStream) |
109 | 124 | ); |
110 | | - indicesChecksum = checksumOut.getChecksum(); |
111 | | - |
112 | | - checksumOut.reset(); |
113 | | - clusterState.blocks().writeVerifiableTo(checksumOut); |
114 | | - blocksChecksum = checksumOut.getChecksum(); |
115 | | - |
116 | | - checksumOut.reset(); |
117 | | - checksumOut.writeStringCollection(clusterState.customs().keySet()); |
118 | | - clusterStateCustomsChecksum = checksumOut.getChecksum(); |
119 | | - } catch (IOException e) { |
120 | | - logger.error("Failed to create checksum for cluster state.", e); |
| 125 | + return null; |
| 126 | + }, checksum -> indicesChecksum = checksum, executorService, latch); |
| 127 | + |
| 128 | + executeChecksumTask((stream) -> { |
| 129 | + clusterState.blocks().writeVerifiableTo(stream); |
| 130 | + return null; |
| 131 | + }, checksum -> blocksChecksum = checksum, executorService, latch); |
| 132 | + |
| 133 | + executeChecksumTask((stream) -> { |
| 134 | + stream.writeStringCollection(clusterState.customs().keySet()); |
| 135 | + return null; |
| 136 | + }, checksum -> clusterStateCustomsChecksum = checksum, executorService, latch); |
| 137 | + |
| 138 | + try { |
| 139 | + latch.await(); |
| 140 | + } catch (InterruptedException e) { |
121 | 141 | throw new RemoteStateTransferException("Failed to create checksum for cluster state.", e); |
122 | 142 | } |
123 | 143 | createClusterStateChecksum(); |
| 144 | + logger.debug("Checksum execution time {}", TimeValue.nsecToMSec(threadpool.relativeTimeInNanos() - start)); |
| 145 | + } |
| 146 | + |
| 147 | + private void executeChecksumTask( |
| 148 | + CheckedFunction<BufferedChecksumStreamOutput, Void, IOException> checksumTask, |
| 149 | + Consumer<Long> checksumConsumer, |
| 150 | + ExecutorService executorService, |
| 151 | + CountDownLatch latch |
| 152 | + ) { |
| 153 | + executorService.execute(() -> { |
| 154 | + try { |
| 155 | + long checksum = createChecksum(checksumTask); |
| 156 | + checksumConsumer.accept(checksum); |
| 157 | + latch.countDown(); |
| 158 | + } catch (IOException e) { |
| 159 | + throw new RemoteStateTransferException("Failed to execute checksum task", e); |
| 160 | + } |
| 161 | + }); |
| 162 | + } |
| 163 | + |
| 164 | + private long createChecksum(CheckedFunction<BufferedChecksumStreamOutput, Void, IOException> task) throws IOException { |
| 165 | + try ( |
| 166 | + BytesStreamOutput out = new BytesStreamOutput(); |
| 167 | + BufferedChecksumStreamOutput checksumOut = new BufferedChecksumStreamOutput(out) |
| 168 | + ) { |
| 169 | + task.apply(checksumOut); |
| 170 | + return checksumOut.getChecksum(); |
| 171 | + } |
124 | 172 | } |
125 | 173 |
|
126 | 174 | private void createClusterStateChecksum() { |
|
0 commit comments