Skip to content

Commit 6e5bbd4

Browse files
author
Andrey Ershov
committed
Exclude node from voting configuration when restarting it
1 parent 830def1 commit 6e5bbd4

File tree

2 files changed

+60
-51
lines changed

2 files changed

+60
-51
lines changed

server/src/test/java/org/elasticsearch/gateway/GatewayIndexStateIT.java

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -332,12 +332,10 @@ public boolean clearData(String nodeName) {
332332
*/
333333
public void testIndexDeletionWhenNodeRejoins() throws Exception {
334334
final String indexName = "test-index-del-on-node-rejoin-idx";
335-
// We need at least 3 nodes to make sure, that once one node is stopped, remaining nodes can elect a new master
336-
final int numNodes = 3;
335+
final int numNodes = 2;
337336

338337
final List<String> nodes;
339338
logger.info("--> starting a cluster with " + numNodes + " nodes");
340-
341339
nodes = internalCluster().startNodes(numNodes,
342340
Settings.builder().put(IndexGraveyard.SETTING_MAX_TOMBSTONES.getKey(), randomIntBetween(10, 100)).build());
343341
logger.info("--> create an index");

test/framework/src/main/java/org/elasticsearch/test/InternalTestCluster.java

Lines changed: 59 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1636,35 +1636,7 @@ private synchronized void stopNodesAndClient(NodeAndClient nodeAndClient) throws
16361636
}
16371637

16381638
private synchronized void stopNodesAndClients(Collection<NodeAndClient> nodeAndClients) throws IOException {
1639-
final Set<String> excludedNodeIds = new HashSet<>();
1640-
1641-
if (autoManageMinMasterNodes && nodeAndClients.size() > 0) {
1642-
1643-
final long currentMasters = nodes.values().stream().filter(NodeAndClient::isMasterEligible).count();
1644-
final long stoppingMasters = nodeAndClients.stream().filter(NodeAndClient::isMasterEligible).count();
1645-
1646-
assert stoppingMasters <= currentMasters : currentMasters + " < " + stoppingMasters;
1647-
if (stoppingMasters != currentMasters && stoppingMasters > 0) {
1648-
// If stopping few enough master-nodes that there's still a majority left, there is no need to withdraw their votes first.
1649-
// However, we do not yet have a way to be sure there's a majority left, because the voting configuration may not yet have
1650-
// been updated when the previous nodes shut down, so we must always explicitly withdraw votes.
1651-
// TODO add cluster health API to check that voting configuration is optimal so this isn't always needed
1652-
nodeAndClients.stream().filter(NodeAndClient::isMasterEligible).map(NodeAndClient::getName).forEach(excludedNodeIds::add);
1653-
assert excludedNodeIds.size() == stoppingMasters;
1654-
1655-
logger.info("adding voting config exclusions {} prior to shutdown", excludedNodeIds);
1656-
try {
1657-
client().execute(AddVotingConfigExclusionsAction.INSTANCE,
1658-
new AddVotingConfigExclusionsRequest(excludedNodeIds.toArray(new String[0]))).get();
1659-
} catch (InterruptedException | ExecutionException e) {
1660-
throw new AssertionError("unexpected", e);
1661-
}
1662-
}
1663-
1664-
if (stoppingMasters > 0) {
1665-
updateMinMasterNodes(getMasterNodesCount() - Math.toIntExact(stoppingMasters));
1666-
}
1667-
}
1639+
final Set<String> excludedNodeIds = excludeMasters(nodeAndClients);
16681640

16691641
for (NodeAndClient nodeAndClient: nodeAndClients) {
16701642
removeDisruptionSchemeFromNode(nodeAndClient);
@@ -1673,14 +1645,7 @@ private synchronized void stopNodesAndClients(Collection<NodeAndClient> nodeAndC
16731645
nodeAndClient.close();
16741646
}
16751647

1676-
if (excludedNodeIds.isEmpty() == false) {
1677-
logger.info("removing voting config exclusions for {} after shutdown", excludedNodeIds);
1678-
try {
1679-
client().execute(ClearVotingConfigExclusionsAction.INSTANCE, new ClearVotingConfigExclusionsRequest()).get();
1680-
} catch (InterruptedException | ExecutionException e) {
1681-
throw new AssertionError("unexpected", e);
1682-
}
1683-
}
1648+
removeExclusions(excludedNodeIds);
16841649
}
16851650

16861651
/**
@@ -1746,31 +1711,78 @@ public synchronized void rollingRestart(RestartCallback callback) throws Excepti
17461711

17471712
private void restartNode(NodeAndClient nodeAndClient, RestartCallback callback) throws Exception {
17481713
logger.info("Restarting node [{}] ", nodeAndClient.name);
1714+
17491715
if (activeDisruptionScheme != null) {
17501716
activeDisruptionScheme.removeFromNode(nodeAndClient.name, this);
17511717
}
1752-
final int masterNodesCount = getMasterNodesCount();
1753-
// special case to allow stopping one node in a two node cluster and keep it functional
1754-
final boolean updateMinMaster = nodeAndClient.isMasterEligible() && masterNodesCount == 2 && autoManageMinMasterNodes;
1755-
if (updateMinMaster) {
1756-
updateMinMasterNodes(masterNodesCount - 1);
1757-
}
1718+
1719+
Set<String> excludedNodeIds = excludeMasters(Collections.singleton(nodeAndClient));
1720+
17581721
final Settings newSettings = nodeAndClient.closeForRestart(callback,
1759-
autoManageMinMasterNodes ? getMinMasterNodes(masterNodesCount) : -1);
1722+
autoManageMinMasterNodes ? getMinMasterNodes(getMasterNodesCount()) : -1);
1723+
1724+
removeExclusions(excludedNodeIds);
1725+
17601726
nodeAndClient.recreateNode(newSettings, () -> rebuildUnicastHostFiles(emptyList()));
17611727
nodeAndClient.startNode();
17621728
if (activeDisruptionScheme != null) {
17631729
activeDisruptionScheme.applyToNode(nodeAndClient.name, this);
17641730
}
1765-
if (callback.validateClusterForming() || updateMinMaster) {
1731+
1732+
if (callback.validateClusterForming() && excludedNodeIds.isEmpty() == false) {
17661733
// we have to validate cluster size if updateMinMaster == true, because we need the
17671734
// second node to join in order to increment min_master_nodes back to 2.
17681735
// we also have to do via the node that was just restarted as it may be that the master didn't yet process
17691736
// the fact it left
17701737
validateClusterFormed(nodeAndClient.name);
17711738
}
1772-
if (updateMinMaster) {
1773-
updateMinMasterNodes(masterNodesCount);
1739+
1740+
if (excludedNodeIds.isEmpty() == false) {
1741+
updateMinMasterNodes(getMasterNodesCount());
1742+
}
1743+
}
1744+
1745+
private Set<String> excludeMasters(Collection<NodeAndClient> nodeAndClients) {
1746+
final Set<String> excludedNodeIds = new HashSet<>();
1747+
if (autoManageMinMasterNodes && nodeAndClients.size() > 0) {
1748+
1749+
final long currentMasters = nodes.values().stream().filter(NodeAndClient::isMasterEligible).count();
1750+
final long stoppingMasters = nodeAndClients.stream().filter(NodeAndClient::isMasterEligible).count();
1751+
1752+
assert stoppingMasters <= currentMasters : currentMasters + " < " + stoppingMasters;
1753+
if (stoppingMasters != currentMasters && stoppingMasters > 0) {
1754+
// If stopping few enough master-nodes that there's still a majority left, there is no need to withdraw their votes first.
1755+
// However, we do not yet have a way to be sure there's a majority left, because the voting configuration may not yet have
1756+
// been updated when the previous nodes shut down, so we must always explicitly withdraw votes.
1757+
// TODO add cluster health API to check that voting configuration is optimal so this isn't always needed
1758+
nodeAndClients.stream().filter(NodeAndClient::isMasterEligible).map(NodeAndClient::getName).forEach(excludedNodeIds::add);
1759+
assert excludedNodeIds.size() == stoppingMasters;
1760+
1761+
logger.info("adding voting config exclusions {} prior to restart/shutdown", excludedNodeIds);
1762+
try {
1763+
client().execute(AddVotingConfigExclusionsAction.INSTANCE,
1764+
new AddVotingConfigExclusionsRequest(excludedNodeIds.toArray(new String[0]))).get();
1765+
} catch (InterruptedException | ExecutionException e) {
1766+
throw new AssertionError("unexpected", e);
1767+
}
1768+
}
1769+
1770+
if (stoppingMasters > 0) {
1771+
updateMinMasterNodes(getMasterNodesCount() - Math.toIntExact(stoppingMasters));
1772+
}
1773+
}
1774+
return excludedNodeIds;
1775+
}
1776+
1777+
private void removeExclusions(Set<String> excludedNodeIds) {
1778+
if (excludedNodeIds.isEmpty() == false) {
1779+
logger.info("removing voting config exclusions for {} after restart/shutdown", excludedNodeIds);
1780+
try {
1781+
Client client = getRandomNodeAndClient(node -> excludedNodeIds.contains(node.name) == false).client(random);
1782+
client.execute(ClearVotingConfigExclusionsAction.INSTANCE, new ClearVotingConfigExclusionsRequest()).get();
1783+
} catch (InterruptedException | ExecutionException e) {
1784+
throw new AssertionError("unexpected", e);
1785+
}
17741786
}
17751787
}
17761788

@@ -1828,7 +1840,6 @@ public synchronized void fullRestart(RestartCallback callback) throws Exception
18281840
}
18291841
}
18301842

1831-
18321843
/**
18331844
* Returns the name of the current master node in the cluster.
18341845
*/

0 commit comments

Comments
 (0)