Skip to content

Commit 2f9f8e1

Browse files
authored
Fix for bug showing incorrect awareness attributes count in AwarenessAllocationDecider (#3428)
* Fix for bug showing incorrect awareness attributes count in AwarenessAllocationDecider Signed-off-by: Anshu Agarwal <[email protected]>
1 parent cce0781 commit 2f9f8e1

File tree

2 files changed

+101
-4
lines changed

2 files changed

+101
-4
lines changed

server/src/main/java/org/opensearch/cluster/routing/allocation/decider/AwarenessAllocationDecider.java

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,14 @@
3333
package org.opensearch.cluster.routing.allocation.decider;
3434

3535
import java.util.HashMap;
36+
import java.util.HashSet;
3637
import java.util.List;
3738
import java.util.Map;
39+
import java.util.Set;
3840
import java.util.function.Function;
3941

4042
import com.carrotsearch.hppc.ObjectIntHashMap;
43+
import com.carrotsearch.hppc.cursors.ObjectCursor;
4144
import org.opensearch.cluster.metadata.IndexMetadata;
4245
import org.opensearch.cluster.routing.RoutingNode;
4346
import org.opensearch.cluster.routing.ShardRouting;
@@ -207,12 +210,14 @@ private Decision underCapacity(ShardRouting shardRouting, RoutingNode node, Rout
207210

208211
int numberOfAttributes = nodesPerAttribute.size();
209212
List<String> fullValues = forcedAwarenessAttributes.get(awarenessAttribute);
213+
210214
if (fullValues != null) {
211-
for (String fullValue : fullValues) {
212-
if (shardPerAttribute.containsKey(fullValue) == false) {
213-
numberOfAttributes++;
214-
}
215+
// If forced awareness is enabled, numberOfAttributes = count(distinct((union(discovered_attributes, forced_attributes)))
216+
Set<String> attributesSet = new HashSet<>(fullValues);
217+
for (ObjectCursor<String> stringObjectCursor : nodesPerAttribute.keys()) {
218+
attributesSet.add(stringObjectCursor.value);
215219
}
220+
numberOfAttributes = attributesSet.size();
216221
}
217222
// TODO should we remove ones that are not part of full list?
218223

server/src/test/java/org/opensearch/cluster/routing/allocation/AwarenessAllocationTests.java

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,23 +35,32 @@
3535
import org.apache.logging.log4j.LogManager;
3636
import org.apache.logging.log4j.Logger;
3737
import org.opensearch.Version;
38+
import org.opensearch.cluster.ClusterName;
3839
import org.opensearch.cluster.ClusterState;
3940
import org.opensearch.cluster.OpenSearchAllocationTestCase;
4041
import org.opensearch.cluster.metadata.IndexMetadata;
4142
import org.opensearch.cluster.metadata.Metadata;
4243
import org.opensearch.cluster.node.DiscoveryNodes;
44+
import org.opensearch.cluster.routing.RoutingNode;
45+
import org.opensearch.cluster.routing.RoutingNodes;
4346
import org.opensearch.cluster.routing.RoutingTable;
4447
import org.opensearch.cluster.routing.ShardRouting;
4548
import org.opensearch.cluster.routing.ShardRoutingState;
4649
import org.opensearch.cluster.routing.allocation.command.AllocationCommands;
4750
import org.opensearch.cluster.routing.allocation.command.CancelAllocationCommand;
4851
import org.opensearch.cluster.routing.allocation.command.MoveAllocationCommand;
52+
import org.opensearch.cluster.routing.allocation.decider.AllocationDeciders;
4953
import org.opensearch.cluster.routing.allocation.decider.AwarenessAllocationDecider;
5054
import org.opensearch.cluster.routing.allocation.decider.ClusterRebalanceAllocationDecider;
55+
import org.opensearch.cluster.routing.allocation.decider.Decision;
5156
import org.opensearch.cluster.routing.allocation.decider.ThrottlingAllocationDecider;
57+
58+
import org.opensearch.common.settings.ClusterSettings;
5259
import org.opensearch.common.settings.Settings;
5360

61+
import java.util.Collections;
5462
import java.util.HashMap;
63+
import java.util.List;
5564
import java.util.Map;
5665

5766
import static java.util.Collections.singletonMap;
@@ -971,4 +980,87 @@ public void testMultipleAwarenessAttributes() {
971980
assertThat(clusterState.getRoutingNodes().shardsWithState(STARTED).size(), equalTo(2));
972981
assertThat(clusterState.getRoutingNodes().shardsWithState(INITIALIZING).size(), equalTo(0));
973982
}
983+
984+
public void testAllocationExplainForUnassignedShardsWithUnbalancedZones() {
985+
Settings settings = Settings.builder()
986+
.put("cluster.routing.allocation.node_concurrent_recoveries", 10)
987+
.put(ThrottlingAllocationDecider.CLUSTER_ROUTING_ALLOCATION_NODE_INITIAL_REPLICAS_RECOVERIES_SETTING.getKey(), 10)
988+
.put(ClusterRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE_SETTING.getKey(), "always")
989+
.put("cluster.routing.allocation.awareness.attributes", "zone")
990+
.put("cluster.routing.allocation.awareness.force.zone.values", "a,b,c")
991+
.build();
992+
993+
AllocationService strategy = createAllocationService(settings);
994+
995+
logger.info("Building initial routing table for 'testAllocationExplainForUnassignedShardsWithUnbalancedZones'");
996+
Metadata metadata = Metadata.builder()
997+
.put(IndexMetadata.builder("test").settings(settings(Version.CURRENT)).numberOfShards(1).numberOfReplicas(2))
998+
.build();
999+
1000+
RoutingTable initialRoutingTable = RoutingTable.builder().addAsNew(metadata.index("test")).build();
1001+
1002+
ClusterState clusterState = ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY))
1003+
.metadata(metadata)
1004+
.routingTable(initialRoutingTable)
1005+
.build();
1006+
1007+
logger.info("--> adding 3 nodes in different zones and do rerouting");
1008+
clusterState = ClusterState.builder(clusterState)
1009+
.nodes(
1010+
DiscoveryNodes.builder()
1011+
.add(newNode("A-0", singletonMap("zone", "a")))
1012+
.add(newNode("A-1", singletonMap("zone", "a")))
1013+
.add(newNode("B-0", singletonMap("zone", "b")))
1014+
)
1015+
.build();
1016+
clusterState = strategy.reroute(clusterState, "reroute");
1017+
assertThat(clusterState.getRoutingNodes().shardsWithState(STARTED).size(), equalTo(0));
1018+
assertThat(clusterState.getRoutingNodes().shardsWithState(INITIALIZING).size(), equalTo(1));
1019+
1020+
logger.info("--> start the shard (primary)");
1021+
clusterState = startInitializingShardsAndReroute(strategy, clusterState);
1022+
assertThat(clusterState.getRoutingNodes().shardsWithState(STARTED).size(), equalTo(1));
1023+
assertThat(clusterState.getRoutingNodes().shardsWithState(INITIALIZING).size(), equalTo(1));
1024+
// One Shard is unassigned due to forced zone awareness
1025+
assertThat(clusterState.getRoutingNodes().shardsWithState(UNASSIGNED).size(), equalTo(1));
1026+
1027+
List<ShardRouting> unassignedShards = clusterState.getRoutingTable().shardsWithState(UNASSIGNED);
1028+
1029+
ClusterSettings EMPTY_CLUSTER_SETTINGS = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS);
1030+
// Add a new node in zone c
1031+
clusterState = ClusterState.builder(clusterState)
1032+
.nodes(DiscoveryNodes.builder(clusterState.nodes()).add(newNode("C-0", singletonMap("zone", "c"))))
1033+
.build();
1034+
1035+
final AwarenessAllocationDecider decider = new AwarenessAllocationDecider(settings, EMPTY_CLUSTER_SETTINGS);
1036+
1037+
final RoutingAllocation allocation = new RoutingAllocation(
1038+
new AllocationDeciders(Collections.singleton(decider)),
1039+
clusterState.getRoutingNodes(),
1040+
clusterState,
1041+
null,
1042+
null,
1043+
0L
1044+
);
1045+
allocation.debugDecision(true);
1046+
1047+
Decision decision = null;
1048+
RoutingNodes nodes = clusterState.getRoutingNodes();
1049+
1050+
for (RoutingNode node : nodes) {
1051+
// Try to allocate unassigned shard to A-0, fails because of forced zone awareness
1052+
if (node.nodeId().equals("A-0")) {
1053+
decision = decider.canAllocate(unassignedShards.get(0), node, allocation);
1054+
assertEquals(Decision.Type.NO, decision.type());
1055+
assertEquals(
1056+
decision.getExplanation(),
1057+
"there are too many copies of the shard allocated to nodes with attribute"
1058+
+ " [zone], there are [3] total configured shard copies for this shard id and [3]"
1059+
+ " total attribute values, expected the allocated shard count per attribute [2] to"
1060+
+ " be less than or equal to the upper bound of the required number of shards per attribute [1]"
1061+
);
1062+
}
1063+
1064+
}
1065+
}
9741066
}

0 commit comments

Comments
 (0)