Skip to content

Commit 0b9efff

Browse files
authored
Zen2: Persist cluster states the old way on non-master-eligible nodes (#36247)
The shard deletion logic (triggered by IndicesStore), which also leads to index metadata deletion on non-master-eligible data nodes, currently races against the new cluster state persistence logic triggered by accepting cluster states. One thread is writing the index metadata while another one is deleting the index metadata, leading to exceptions and assertions tripping (see below). The solution proposed by this PR is to move the cluster state persistence of non-master-eligible nodes back to the cluster applier service, just as it used to be for Zen1. This ensures that the index metadata deletion logic, which is triggered by the shard deletion logic, runs on the same thread on which we persist the cluster state.
1 parent 42457b5 commit 0b9efff

File tree

2 files changed

+24
-4
lines changed

2 files changed

+24
-4
lines changed

server/src/main/java/org/elasticsearch/gateway/GatewayMetaState.java

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,13 @@ public void applyClusterState(ClusterChangedEvent event) {
205205
}
206206

207207
try {
208+
// Hack: This is to ensure that non-master-eligible Zen2 nodes always store a current term
209+
// that's higher than the last accepted term.
210+
// TODO: can we get rid of this hack?
211+
if (event.state().term() > getCurrentTerm()) {
212+
innerSetCurrentTerm(event.state().term());
213+
}
214+
208215
updateClusterState(event.state(), event.previousState());
209216
incrementalWrite = true;
210217
} catch (WriteStateException e) {
@@ -225,17 +232,21 @@ public ClusterState getLastAcceptedState() {
225232

226233
@Override
227234
public void setCurrentTerm(long currentTerm) {
228-
Manifest manifest = new Manifest(currentTerm, previousManifest.getClusterStateVersion(), previousManifest.getGlobalGeneration(),
229-
new HashMap<>(previousManifest.getIndexGenerations()));
230235
try {
231-
metaStateService.writeManifestAndCleanup("current term changed", manifest);
232-
previousManifest = manifest;
236+
innerSetCurrentTerm(currentTerm);
233237
} catch (WriteStateException e) {
234238
logger.warn("Exception occurred when setting current term", e);
235239
//TODO re-throw exception
236240
}
237241
}
238242

243+
private void innerSetCurrentTerm(long currentTerm) throws WriteStateException {
244+
Manifest manifest = new Manifest(currentTerm, previousManifest.getClusterStateVersion(), previousManifest.getGlobalGeneration(),
245+
new HashMap<>(previousManifest.getIndexGenerations()));
246+
metaStateService.writeManifestAndCleanup("current term changed", manifest);
247+
previousManifest = manifest;
248+
}
249+
239250
@Override
240251
public void setLastAcceptedState(ClusterState clusterState) {
241252
try {

test/framework/src/main/java/org/elasticsearch/test/discovery/TestZenDiscovery.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,11 @@
2121

2222
import org.elasticsearch.cluster.coordination.CoordinationState;
2323
import org.elasticsearch.cluster.coordination.Coordinator;
24+
import org.elasticsearch.cluster.coordination.InMemoryPersistedState;
25+
import org.elasticsearch.cluster.node.DiscoveryNode;
2426
import org.elasticsearch.cluster.routing.allocation.AllocationService;
2527
import org.elasticsearch.cluster.service.ClusterApplier;
28+
import org.elasticsearch.cluster.service.ClusterApplierService;
2629
import org.elasticsearch.cluster.service.MasterService;
2730
import org.elasticsearch.common.Randomness;
2831
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
@@ -80,6 +83,12 @@ public Map<String, Supplier<Discovery>> getDiscoveryTypes(ThreadPool threadPool,
8083
if (USE_ZEN2.get(settings)) {
8184
Supplier<CoordinationState.PersistedState> persistedStateSupplier = () -> {
8285
gatewayMetaState.applyClusterStateUpdaters();
86+
if (DiscoveryNode.isMasterNode(settings) == false) {
87+
// use Zen1 way of writing cluster state for non-master-eligible nodes
88+
// this avoids concurrent manipulating of IndexMetadata with IndicesStore
89+
((ClusterApplierService) clusterApplier).addLowPriorityApplier(gatewayMetaState);
90+
return new InMemoryPersistedState(gatewayMetaState.getCurrentTerm(), gatewayMetaState.getLastAcceptedState());
91+
}
8392
return gatewayMetaState;
8493
};
8594

0 commit comments

Comments
 (0)