Skip to content

Commit 1636d3c

Browse files
zshuyiamberzsy
authored andcommitted
Skip zone awareness when auto-expand set to all
Signed-off-by: amberzsy <[email protected]>
1 parent 12ff5ed commit 1636d3c

File tree

5 files changed

+110
-0
lines changed

5 files changed

+110
-0
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
88
- MultiTermQueries in keyword fields now default to `indexed` approach and gated behind cluster setting ([#15637](https://github.com/opensearch-project/OpenSearch/pull/15637))
99
- [Workload Management] QueryGroup resource cancellation framework changes ([#15651](https://github.com/opensearch-project/OpenSearch/pull/15651))
1010
- Fallback to Remote cluster-state on Term-Version check mismatch - ([#15424](https://github.com/opensearch-project/OpenSearch/pull/15424))
11+
- Skip zone awareness when auto-expand set to all - ([#14619](https://github.com/opensearch-project/OpenSearch/pull/14619))
1112

1213
### Dependencies
1314
- Bump `com.azure:azure-identity` from 1.13.0 to 1.13.2 ([#15578](https://github.com/opensearch-project/OpenSearch/pull/15578))

server/src/internalClusterTest/java/org/opensearch/cluster/allocation/AwarenessAllocationIT.java

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -504,4 +504,68 @@ public void testThreeZoneOneReplicaWithForceZoneValueAndLoadAwareness() throws E
504504
assertThat(clusterState.getRoutingNodes().shardsWithState(STARTED).size(), equalTo(2 * numOfShards * (numOfReplica + 1)));
505505
assertThat(health.isTimedOut(), equalTo(false));
506506
}
507+
508+
public void testAwarenessZonesWithAutoExpand() {
509+
Settings commonSettings = Settings.builder()
510+
.put(AwarenessReplicaBalance.CLUSTER_ROUTING_ALLOCATION_AWARENESS_BALANCE_SETTING.getKey(), true)
511+
.put(AwarenessAllocationDecider.CLUSTER_ROUTING_ALLOCATION_AWARENESS_FORCE_GROUP_SETTING.getKey() + "zone.values", "a")
512+
.put(AwarenessAllocationDecider.CLUSTER_ROUTING_ALLOCATION_AWARENESS_ATTRIBUTE_SETTING.getKey(), "zone")
513+
.build();
514+
515+
logger.info("--> starting 2 nodes on same zone");
516+
List<String> nodes = internalCluster().startNodes(
517+
Settings.builder().put(commonSettings).put("node.attr.zone", "a").build(),
518+
Settings.builder().put(commonSettings).put("node.attr.zone", "a").build()
519+
);
520+
String A = nodes.get(0);
521+
String B = nodes.get(1);
522+
523+
logger.info("--> waiting for nodes to form a cluster");
524+
ClusterHealthResponse health = client().admin().cluster().prepareHealth().setWaitForNodes("2").execute().actionGet();
525+
assertThat(health.isTimedOut(), equalTo(false));
526+
527+
createIndex(
528+
"test",
529+
Settings.builder()
530+
.put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 2)
531+
.put(IndexMetadata.SETTING_AUTO_EXPAND_REPLICAS, "0-all")
532+
.build()
533+
);
534+
535+
if (randomBoolean()) {
536+
assertAcked(client().admin().indices().prepareClose("test"));
537+
}
538+
539+
logger.info("--> waiting for shards to be allocated");
540+
health = client().admin()
541+
.cluster()
542+
.prepareHealth()
543+
.setIndices("test")
544+
.setWaitForEvents(Priority.LANGUID)
545+
.setWaitForGreenStatus()
546+
.setWaitForNoRelocatingShards(true)
547+
.execute()
548+
.actionGet();
549+
assertThat(health.isTimedOut(), equalTo(false));
550+
551+
ClusterState clusterState = client().admin().cluster().prepareState().execute().actionGet().getState();
552+
assertThat(clusterState.getRoutingNodes().shardsWithState(STARTED).size(), equalTo(4));
553+
554+
final Map<String, Integer> counts = new HashMap<>();
555+
int replicaCount = 0;
556+
557+
for (IndexRoutingTable indexRoutingTable : clusterState.routingTable()) {
558+
for (IndexShardRoutingTable indexShardRoutingTable : indexRoutingTable) {
559+
for (ShardRouting shardRouting : indexShardRoutingTable) {
560+
if (shardRouting.primary()) {
561+
replicaCount++;
562+
}
563+
counts.merge(clusterState.nodes().get(shardRouting.currentNodeId()).getName(), 1, Integer::sum);
564+
}
565+
}
566+
}
567+
assertThat(counts.get(A), anyOf(equalTo(1), equalTo(2)));
568+
assertThat(counts.get(B), anyOf(equalTo(1), equalTo(2)));
569+
assertThat(replicaCount, equalTo(2));
570+
}
507571
}

server/src/main/java/org/opensearch/cluster/metadata/AutoExpandReplicas.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,10 @@ public boolean isEnabled() {
140140
return enabled;
141141
}
142142

143+
public boolean autoExpandToAll() {
144+
return maxReplicas == Integer.MAX_VALUE;
145+
}
146+
143147
private OptionalInt getDesiredNumberOfReplicas(IndexMetadata indexMetadata, RoutingAllocation allocation) {
144148
if (enabled) {
145149
int numMatchingDataNodes = 0;

server/src/main/java/org/opensearch/cluster/routing/allocation/decider/AwarenessAllocationDecider.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
import java.util.function.Function;
5151

5252
import static java.util.Collections.emptyList;
53+
import static org.opensearch.cluster.metadata.IndexMetadata.INDEX_AUTO_EXPAND_REPLICAS_SETTING;
5354

5455
/**
5556
* This {@link AllocationDecider} controls shard allocation based on
@@ -160,6 +161,11 @@ private Decision underCapacity(ShardRouting shardRouting, RoutingNode node, Rout
160161
}
161162

162163
IndexMetadata indexMetadata = allocation.metadata().getIndexSafe(shardRouting.index());
164+
165+
if (INDEX_AUTO_EXPAND_REPLICAS_SETTING.get(indexMetadata.getSettings()).autoExpandToAll()) {
166+
return allocation.decision(Decision.YES, NAME, "allocation awareness is ignored, this index is set to auto-expand to all");
167+
}
168+
163169
int shardCount = indexMetadata.getNumberOfReplicas() + 1; // 1 for primary
164170
for (String awarenessAttribute : awarenessAttributes) {
165171
// the node the shard exists on must be associated with an awareness attribute.

server/src/test/java/org/opensearch/cluster/routing/allocation/AwarenessAllocationTests.java

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1063,4 +1063,39 @@ public void testAllocationExplainForUnassignedShardsWithUnbalancedZones() {
10631063

10641064
}
10651065
}
1066+
1067+
public void testIgnoredByAutoExpandReplicasToAll() {
1068+
final Settings settings = Settings.builder()
1069+
.put(AwarenessAllocationDecider.CLUSTER_ROUTING_ALLOCATION_AWARENESS_ATTRIBUTE_SETTING.getKey(), "zone")
1070+
.build();
1071+
1072+
final AllocationService strategy = createAllocationService(settings);
1073+
1074+
final IndexMetadata.Builder metadataBuilder = IndexMetadata.builder("test")
1075+
.settings(
1076+
settings(Version.CURRENT).put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1)
1077+
.put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 100)
1078+
.put(IndexMetadata.SETTING_AUTO_EXPAND_REPLICAS, "0-all")
1079+
);
1080+
1081+
final Metadata metadata = Metadata.builder().put(metadataBuilder).build();
1082+
1083+
final DiscoveryNodes nodes = DiscoveryNodes.builder()
1084+
.add(newNode("A-0", singletonMap("zone", "a")))
1085+
.add(newNode("A-1", singletonMap("zone", "a")))
1086+
.add(newNode("A-2", singletonMap("zone", "a")))
1087+
.add(newNode("B-0", singletonMap("zone", "b")))
1088+
.build();
1089+
1090+
final ClusterState clusterState = applyStartedShardsUntilNoChange(
1091+
ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.get(Settings.EMPTY))
1092+
.metadata(metadata)
1093+
.routingTable(RoutingTable.builder().addAsNew(metadata.index("test")).build())
1094+
.nodes(nodes)
1095+
.build(),
1096+
strategy
1097+
);
1098+
1099+
assertThat(clusterState.getRoutingNodes().shardsWithState(UNASSIGNED).size(), equalTo(0));
1100+
}
10661101
}

0 commit comments

Comments
 (0)