Skip to content

Commit bc1dd06

Browse files
committed
Optmising AwarenessAllocationDecider for hashmap.get call
Signed-off-by: RS146BIJAY <[email protected]>
1 parent 29a3e2c commit bc1dd06

File tree

5 files changed

+1200
-31
lines changed

5 files changed

+1200
-31
lines changed

server/src/internalClusterTest/java/org/opensearch/cluster/allocation/AwarenessAllocationIT.java

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,74 @@ public void testSimpleAwareness() throws Exception {
139139
}, 10, TimeUnit.SECONDS);
140140
}
141141

142+
public void testSimpleAwarenessWithZoneOptimised() throws Exception {
143+
Settings commonSettings = Settings.builder()
144+
.put("cluster.routing.allocation.awareness.attributes", "zone")
145+
.put("cluster.routing.allocation.awareness.zone_optimised", true)
146+
.build();
147+
148+
logger.info("--> starting 2 nodes on the same rack");
149+
internalCluster().startNodes(2, Settings.builder().put(commonSettings).put("node.attr.zone", "a").build());
150+
151+
Settings settings = Settings.builder()
152+
.put(AwarenessReplicaBalance.CLUSTER_ROUTING_ALLOCATION_AWARENESS_BALANCE_SETTING.getKey(), false)
153+
.build();
154+
ClusterUpdateSettingsRequest updateSettingsRequest = new ClusterUpdateSettingsRequest();
155+
updateSettingsRequest.persistentSettings(settings);
156+
157+
createIndex("test1");
158+
createIndex("test2");
159+
160+
NumShards test1 = getNumShards("test1");
161+
NumShards test2 = getNumShards("test2");
162+
// no replicas will be allocated as both indices end up on a single node
163+
final int totalPrimaries = test1.numPrimaries + test2.numPrimaries;
164+
165+
ensureGreen();
166+
167+
final List<String> indicesToClose = randomSubsetOf(Arrays.asList("test1", "test2"));
168+
indicesToClose.forEach(indexToClose -> assertAcked(client().admin().indices().prepareClose(indexToClose).get()));
169+
170+
logger.info("--> starting 1 node on a different rack");
171+
final String node3 = internalCluster().startNode(Settings.builder().put(commonSettings).put("node.attr.zone", "b").build());
172+
173+
// On slow machines the initial relocation might be delayed
174+
assertBusy(() -> {
175+
logger.info("--> waiting for no relocation");
176+
ClusterHealthResponse clusterHealth = client().admin()
177+
.cluster()
178+
.prepareHealth()
179+
.setIndices("test1", "test2")
180+
.setWaitForEvents(Priority.LANGUID)
181+
.setWaitForGreenStatus()
182+
.setWaitForNodes("3")
183+
.setWaitForNoRelocatingShards(true)
184+
.get();
185+
186+
assertThat("Cluster health request timed out", clusterHealth.isTimedOut(), equalTo(false));
187+
188+
logger.info("--> checking current state");
189+
ClusterState clusterState = client().admin().cluster().prepareState().execute().actionGet().getState();
190+
191+
// check that closed indices are effectively closed
192+
final List<String> notClosedIndices = indicesToClose.stream()
193+
.filter(index -> clusterState.metadata().index(index).getState() != State.CLOSE)
194+
.collect(Collectors.toList());
195+
assertThat("Some indices not closed", notClosedIndices, empty());
196+
197+
// verify that we have all the primaries on node3
198+
final Map<String, Integer> counts = new HashMap<>();
199+
for (IndexRoutingTable indexRoutingTable : clusterState.routingTable()) {
200+
for (IndexShardRoutingTable indexShardRoutingTable : indexRoutingTable) {
201+
for (ShardRouting shardRouting : indexShardRoutingTable) {
202+
counts.merge(clusterState.nodes().get(shardRouting.currentNodeId()).getName(), 1, Integer::sum);
203+
}
204+
}
205+
}
206+
assertThat(counts.get(node3), equalTo(totalPrimaries));
207+
}, 10, TimeUnit.SECONDS);
208+
}
209+
142210
public void testAwarenessZones() {
143211
Settings commonSettings = Settings.builder()
144212
.put(AwarenessAllocationDecider.CLUSTER_ROUTING_ALLOCATION_AWARENESS_FORCE_GROUP_SETTING.getKey() + "zone.values", "a,b")

server/src/main/java/org/opensearch/cluster/node/DiscoveryNode.java

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,8 @@ public static boolean isDedicatedSearchNode(Settings settings) {
143143
private final Map<String, String> attributes;
144144
private final Version version;
145145
private final SortedSet<DiscoveryNodeRole> roles;
146+
private final String zoneValue;
147+
private final boolean hasZoneAttribute;
146148

147149
/**
148150
* Creates a new {@link DiscoveryNode}
@@ -268,6 +270,8 @@ public DiscoveryNode(
268270
this.version = version;
269271
}
270272
this.attributes = Collections.unmodifiableMap(attributes);
273+
this.zoneValue = this.attributes.get("zone");
274+
this.hasZoneAttribute = attributes.containsKey("zone");
271275
// verify that no node roles are being provided as attributes
272276
Predicate<Map<String, String>> predicate = (attrs) -> {
273277
boolean success = true;
@@ -329,6 +333,9 @@ public DiscoveryNode(StreamInput in) throws IOException {
329333
for (int i = 0; i < size; i++) {
330334
this.attributes.put(in.readString(), in.readString());
331335
}
336+
337+
this.zoneValue = this.attributes.get("zone");
338+
this.hasZoneAttribute = attributes.containsKey("zone");
332339
int rolesSize = in.readVInt();
333340
final Set<DiscoveryNodeRole> roles = new HashSet<>(rolesSize);
334341
for (int i = 0; i < rolesSize; i++) {
@@ -458,6 +465,14 @@ public boolean isRemoteClusterClient() {
458465
return roles.contains(DiscoveryNodeRole.REMOTE_CLUSTER_CLIENT_ROLE);
459466
}
460467

468+
public String getZoneValue() {
469+
return zoneValue;
470+
}
471+
472+
public boolean hasZoneAttribute() {
473+
return hasZoneAttribute;
474+
}
475+
461476
/**
462477
* Returns whether the node is dedicated to provide search capability.
463478
*

server/src/main/java/org/opensearch/cluster/routing/allocation/decider/AwarenessAllocationDecider.java

Lines changed: 78 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -110,13 +110,23 @@ public class AwarenessAllocationDecider extends AllocationDecider {
110110
Property.NodeScope
111111
);
112112

113+
public static final Setting<Boolean> CLUSTER_ROUTING_ALLOCATION_AWARENESS_ZONE_OPTIMISED = Setting.boolSetting(
114+
"cluster.routing.allocation.awareness.zone_optimised",
115+
false,
116+
Setting.Property.Dynamic,
117+
Setting.Property.NodeScope
118+
);
119+
113120
private volatile List<String> awarenessAttributes;
114121

122+
private volatile boolean isAllocationZoneOptimised;
115123
private volatile Map<String, List<String>> forcedAwarenessAttributes;
116124

117125
public AwarenessAllocationDecider(Settings settings, ClusterSettings clusterSettings) {
118126
this.awarenessAttributes = CLUSTER_ROUTING_ALLOCATION_AWARENESS_ATTRIBUTE_SETTING.get(settings);
119127
clusterSettings.addSettingsUpdateConsumer(CLUSTER_ROUTING_ALLOCATION_AWARENESS_ATTRIBUTE_SETTING, this::setAwarenessAttributes);
128+
this.isAllocationZoneOptimised = CLUSTER_ROUTING_ALLOCATION_AWARENESS_ZONE_OPTIMISED.get(settings);
129+
clusterSettings.addSettingsUpdateConsumer(CLUSTER_ROUTING_ALLOCATION_AWARENESS_ZONE_OPTIMISED, this::setAllocationZoneOptimised);
120130
setForcedAwarenessAttributes(CLUSTER_ROUTING_ALLOCATION_AWARENESS_FORCE_GROUP_SETTING.get(settings));
121131
clusterSettings.addSettingsUpdateConsumer(
122132
CLUSTER_ROUTING_ALLOCATION_AWARENESS_FORCE_GROUP_SETTING,
@@ -140,6 +150,10 @@ private void setAwarenessAttributes(List<String> awarenessAttributes) {
140150
this.awarenessAttributes = awarenessAttributes;
141151
}
142152

153+
private void setAllocationZoneOptimised(boolean isAllocationZoneOptimised) {
154+
this.isAllocationZoneOptimised = isAllocationZoneOptimised;
155+
}
156+
143157
@Override
144158
public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) {
145159
return underCapacity(shardRouting, node, allocation, true);
@@ -164,7 +178,7 @@ private Decision underCapacity(ShardRouting shardRouting, RoutingNode node, Rout
164178
int shardCount = indexMetadata.getNumberOfReplicas() + 1; // 1 for primary
165179
for (String awarenessAttribute : awarenessAttributes) {
166180
// the node the shard exists on must be associated with an awareness attribute
167-
if (node.node().getAttributes().containsKey(awarenessAttribute) == false) {
181+
if (isAwarenessAttributeAssociatedWithNode(node, awarenessAttribute) == false) {
168182
return allocation.decision(
169183
Decision.NO,
170184
NAME,
@@ -175,36 +189,10 @@ private Decision underCapacity(ShardRouting shardRouting, RoutingNode node, Rout
175189
);
176190
}
177191

192+
int currentNodeCount = getCurrentNodeCountForAttribute(shardRouting, node, allocation, moveToNode, awarenessAttribute);
193+
178194
// build attr_value -> nodes map
179195
Set<String> nodesPerAttribute = allocation.routingNodes().nodesPerAttributesCounts(awarenessAttribute);
180-
181-
// build the count of shards per attribute value
182-
Map<String, Integer> shardPerAttribute = new HashMap<>();
183-
for (ShardRouting assignedShard : allocation.routingNodes().assignedShards(shardRouting.shardId())) {
184-
if (assignedShard.started() || assignedShard.initializing()) {
185-
// Note: this also counts relocation targets as that will be the new location of the shard.
186-
// Relocation sources should not be counted as the shard is moving away
187-
RoutingNode routingNode = allocation.routingNodes().node(assignedShard.currentNodeId());
188-
shardPerAttribute.merge(routingNode.node().getAttributes().get(awarenessAttribute), 1, Integer::sum);
189-
}
190-
}
191-
192-
if (moveToNode) {
193-
if (shardRouting.assignedToNode()) {
194-
String nodeId = shardRouting.relocating() ? shardRouting.relocatingNodeId() : shardRouting.currentNodeId();
195-
if (node.nodeId().equals(nodeId) == false) {
196-
// we work on different nodes, move counts around
197-
shardPerAttribute.compute(
198-
allocation.routingNodes().node(nodeId).node().getAttributes().get(awarenessAttribute),
199-
(k, v) -> (v == null) ? 0 : v - 1
200-
);
201-
shardPerAttribute.merge(node.node().getAttributes().get(awarenessAttribute), 1, Integer::sum);
202-
}
203-
} else {
204-
shardPerAttribute.merge(node.node().getAttributes().get(awarenessAttribute), 1, Integer::sum);
205-
}
206-
}
207-
208196
int numberOfAttributes = nodesPerAttribute.size();
209197
List<String> fullValues = forcedAwarenessAttributes.get(awarenessAttribute);
210198

@@ -216,9 +204,8 @@ private Decision underCapacity(ShardRouting shardRouting, RoutingNode node, Rout
216204
}
217205
numberOfAttributes = attributesSet.size();
218206
}
219-
// TODO should we remove ones that are not part of full list?
220207

221-
final int currentNodeCount = shardPerAttribute.get(node.node().getAttributes().get(awarenessAttribute));
208+
// TODO should we remove ones that are not part of full list?
222209
final int maximumNodeCount = (shardCount + numberOfAttributes - 1) / numberOfAttributes; // ceil(shardCount/numberOfAttributes)
223210
if (currentNodeCount > maximumNodeCount) {
224211
return allocation.decision(
@@ -238,4 +225,64 @@ private Decision underCapacity(ShardRouting shardRouting, RoutingNode node, Rout
238225

239226
return allocation.decision(Decision.YES, NAME, "node meets all awareness attribute requirements");
240227
}
228+
229+
private int getCurrentNodeCountForAttribute(
230+
ShardRouting shardRouting,
231+
RoutingNode node,
232+
RoutingAllocation allocation,
233+
boolean moveToNode,
234+
String awarenessAttribute
235+
) {
236+
// build the count of shards per attribute value
237+
final String shardAttributeForNode = getAttributeValueForNode(node, awarenessAttribute);
238+
int currentNodeCount = 0;
239+
final List<ShardRouting> assignedShards = allocation.routingNodes().assignedShards(shardRouting.shardId());
240+
for (ShardRouting assignedShard : assignedShards) {
241+
if (assignedShard.started() || assignedShard.initializing()) {
242+
// Note: this also counts relocation targets as that will be the new location of the shard.
243+
// Relocation sources should not be counted as the shard is moving away
244+
RoutingNode routingNode = allocation.routingNodes().node(assignedShard.currentNodeId());
245+
// Increase node count when
246+
if (getAttributeValueForNode(routingNode, awarenessAttribute).equals(shardAttributeForNode)) {
247+
++currentNodeCount;
248+
}
249+
}
250+
}
251+
252+
if (moveToNode) {
253+
if (shardRouting.assignedToNode()) {
254+
String nodeId = shardRouting.relocating() ? shardRouting.relocatingNodeId() : shardRouting.currentNodeId();
255+
if (node.nodeId().equals(nodeId) == false) {
256+
// we work on different nodes, move counts around
257+
if (getAttributeValueForNode(allocation.routingNodes().node(nodeId), awarenessAttribute).equals(shardAttributeForNode)
258+
&& currentNodeCount > 0) {
259+
--currentNodeCount;
260+
}
261+
262+
++currentNodeCount;
263+
}
264+
} else {
265+
++currentNodeCount;
266+
}
267+
}
268+
269+
return currentNodeCount;
270+
}
271+
272+
private boolean isAwarenessAttributeAssociatedWithNode(RoutingNode node, String awarenessAttribute) {
273+
if (isAllocationZoneOptimised) {
274+
return node.node().hasZoneAttribute();
275+
} else {
276+
return node.node().getAttributes().containsKey(awarenessAttribute);
277+
}
278+
}
279+
280+
private String getAttributeValueForNode(final RoutingNode node, final String awarenessAttribute) {
281+
if (isAllocationZoneOptimised) {
282+
return node.node().getZoneValue();
283+
} else {
284+
return node.node().getAttributes().get(awarenessAttribute);
285+
}
286+
}
287+
241288
}

server/src/main/java/org/opensearch/common/settings/ClusterSettings.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,7 @@ public void apply(Settings value, Settings current, Settings previous) {
257257
new HashSet<>(
258258
Arrays.asList(
259259
AwarenessAllocationDecider.CLUSTER_ROUTING_ALLOCATION_AWARENESS_ATTRIBUTE_SETTING,
260+
AwarenessAllocationDecider.CLUSTER_ROUTING_ALLOCATION_AWARENESS_ZONE_OPTIMISED,
260261
AwarenessAllocationDecider.CLUSTER_ROUTING_ALLOCATION_AWARENESS_FORCE_GROUP_SETTING,
261262
AwarenessReplicaBalance.CLUSTER_ROUTING_ALLOCATION_AWARENESS_BALANCE_SETTING,
262263
BalancedShardsAllocator.INDEX_BALANCE_FACTOR_SETTING,

0 commit comments

Comments
 (0)