aws-controllers-k8s · demikl · Sep 4, 2025 · Oct 3, 2025 · knottnt · Oct 9, 2025
diff --git a/pkg/resource/cluster/hook.go b/pkg/resource/cluster/hook.go
@@ -184,6 +184,45 @@ func (rm *resourceManager) clusterInUse(ctx context.Context, r *resource) (bool,
 	return (nodes != nil && len(nodes.Nodegroups) > 0), nil
 }
 
+// isAutoModeCluster returns true if the cluster is configured for EKS Auto Mode.
+// According to AWS documentation, compute, block storage, and load balancing capabilities
+// must all be enabled or disabled together. Any partial configuration is invalid.
+// Returns an error for invalid configurations.
+func isAutoModeCluster(r *resource) (bool, error) {
+	if r == nil || r.ko == nil {
+		return false, nil
+	}
+
+	hasComputeConfig := r.ko.Spec.ComputeConfig != nil
+	hasStorageConfig := r.ko.Spec.StorageConfig != nil && r.ko.Spec.StorageConfig.BlockStorage != nil
+	hasELBConfig := r.ko.Spec.KubernetesNetworkConfig != nil && r.ko.Spec.KubernetesNetworkConfig.ElasticLoadBalancing != nil
+
+	// If no Auto Mode configuration is present, it's valid (not an Auto Mode cluster)
+	if !hasComputeConfig && !hasStorageConfig && !hasELBConfig {
+		return false, nil
+	}
+
+	// If any Auto Mode configuration is present, ALL must be present
+	if !hasComputeConfig || !hasStorageConfig || !hasELBConfig {
+		return false, fmt.Errorf("invalid Auto Mode configuration: when configuring Auto Mode, all three capabilities must be specified (compute=%v, storage=%v, elb=%v)",
+			hasComputeConfig, hasStorageConfig, hasELBConfig)
+	}
+
+	computeEnabled := r.ko.Spec.ComputeConfig.Enabled != nil && *r.ko.Spec.ComputeConfig.Enabled
+	storageEnabled := r.ko.Spec.StorageConfig.BlockStorage.Enabled != nil && *r.ko.Spec.StorageConfig.BlockStorage.Enabled
+	elbEnabled := r.ko.Spec.KubernetesNetworkConfig.ElasticLoadBalancing.Enabled != nil && *r.ko.Spec.KubernetesNetworkConfig.ElasticLoadBalancing.Enabled
+
+	// All three must be in the same state
+	if computeEnabled != storageEnabled || storageEnabled != elbEnabled {
+		return false, fmt.Errorf("invalid Auto Mode configuration: compute, block storage, and load balancing capabilities must all be enabled or disabled together (compute=%v, storage=%v, elb=%v)",
+			computeEnabled, storageEnabled, elbEnabled)
+	}
+
+	isAutoMode := (computeEnabled && storageEnabled && elbEnabled) || (!computeEnabled && !storageEnabled && !elbEnabled)
+	return isAutoMode, nil
+}
+
+
 func customPreCompare(
 	a *resource,
 	b *resource,
@@ -380,25 +419,38 @@ func (rm *resourceManager) customUpdate(
 		return returnClusterUpdating(updatedRes)
 	}
 
-	// Handle computeConfig updates
+	// Handle computeConfig updates - only for Auto Mode clusters
 	if delta.DifferentAt("Spec.ComputeConfig") || delta.DifferentAt("Spec.StorageConfig") || delta.DifferentAt("Spec.KubernetesNetworkConfig") {
-		if err := rm.updateComputeConfig(ctx, desired); err != nil {
-			awsErr, ok := extractAWSError(err)
-			rlog.Info("attempting to update AutoMode config",
-				"error", err,
-				"isAWSError", ok,
-				"awsErrorCode", awsErr.Code)
-
-			// Check to see if we've raced an async update call and need to requeue
-			if ok && awsErr.Code == "ResourceInUseException" {
-				rlog.Info("resource in use, requeueing after async update")
-				return nil, requeueAfterAsyncUpdate()
+		// Validate Auto Mode configuration and proceed only if cluster is configured for Auto Mode
+		isAutoMode, err := isAutoModeCluster(desired)
+		if err != nil {
+			return nil, ackerr.NewTerminalError(err)
+		}
+		if isAutoMode {
+			if err := rm.updateComputeConfig(ctx, desired); err != nil {
+				awsErr, ok := extractAWSError(err)
+				var awsErrorCode string
+				if ok && awsErr != nil {
+					awsErrorCode = awsErr.Code
+				}
+				rlog.Info("attempting to update AutoMode config",
+					"error", err,
+					"isAWSError", ok,
+					"awsErrorCode", awsErrorCode)
+
+				// Check to see if we've raced an async update call and need to requeue
+				if ok && awsErr != nil && awsErr.Code == "ResourceInUseException" {
+					rlog.Info("resource in use, requeueing after async update")
+					return nil, requeueAfterAsyncUpdate()
+				}
+
+				return nil, fmt.Errorf("failed to update AutoMode config: %w", err)
 			}
 
-			return nil, fmt.Errorf("failed to update AutoMode config: %w", err)
+			return returnClusterUpdating(updatedRes)
 		}
-
-		return returnClusterUpdating(updatedRes)
+		// If not Auto Mode, ignore the diff
+		rlog.Info("ignoring diff on compute/storage/network config for non-Auto Mode cluster")
 	}
 
 	// Handle zonalShiftConfig updates

diff --git a/test/e2e/tests/test_cluster_automode.py b/test/e2e/tests/test_cluster_automode.py
@@ -18,6 +18,7 @@
 import logging
 import time
 import pytest
+import json
 
 from acktest.k8s import resource as k8s
 from acktest.k8s import condition
@@ -33,9 +34,10 @@
 from e2e.common.types import CLUSTER_RESOURCE_PLURAL
 from e2e.common.waiter import wait_until_deleted
 from e2e.replacement_values import REPLACEMENT_VALUES
+from e2e.tests.test_cluster import simple_cluster
 
-MODIFY_WAIT_AFTER_SECONDS = 240
-CHECK_STATUS_WAIT_SECONDS = 240
+MODIFY_WAIT_AFTER_SECONDS = 60
+CHECK_STATUS_WAIT_SECONDS = 30
 
 
 def wait_for_cluster_active(eks_client, cluster_name):
@@ -93,8 +95,13 @@ def auto_mode_cluster(eks_client):
 
     yield (ref, cr)
 
-    pass
-
+    # Try to delete, if doesn't already exist
+    try:
+        _, deleted = k8s.delete_custom_resource(ref, 9, 10)
+        assert deleted
+        wait_until_deleted(cluster_name)
+    except Exception:
+        pass
 
 @service_marker
 @pytest.mark.canary
@@ -141,6 +148,164 @@ def test_create_auto_mode_cluster(self, eks_client, auto_mode_cluster):
         time.sleep(CHECK_STATUS_WAIT_SECONDS)
 
         # Clean up
-        _, deleted = k8s.delete_custom_resource(ref, 3, 10)
+        _, deleted = k8s.delete_custom_resource(ref, 9, 10)
         assert deleted
         wait_until_deleted(cluster_name)
+
+
+@service_marker
+@pytest.mark.canary
+class TestAutoModeClusterUpdates:
+    def test_enable_auto_mode_on_standard_cluster(self, eks_client, simple_cluster):
+        (ref, cr) = simple_cluster
+        cluster_name = cr["spec"]["name"]
+
+        aws_res = eks_client.describe_cluster(name=cluster_name)
+        assert aws_res is not None
+
+        # Wait for the cluster to be ACTIVE and let controller refresh status
+        wait_for_cluster_active(eks_client, cluster_name)
+        time.sleep(CHECK_STATUS_WAIT_SECONDS)
+        get_and_assert_status(ref, "ACTIVE", True)
+
+        # Patch to enable auto-mode
+        patch_enable_auto_mode = {
+            "spec": {
+                "computeConfig": {"enabled": True},
+                "storageConfig": {"blockStorage": {"enabled": True}},
+                "kubernetesNetworkConfig": {
+                    "elasticLoadBalancing": {"enabled": True},
+                    "ipFamily": "ipv4",
+                },
+            }
+        }
+        k8s.patch_custom_resource(ref, patch_enable_auto_mode)
+        time.sleep(MODIFY_WAIT_AFTER_SECONDS)
+        get_and_assert_status(ref, "UPDATING", False)
+
+        # Wait for cluster to become active after update
+        wait_for_cluster_active(eks_client, cluster_name)
+        time.sleep(CHECK_STATUS_WAIT_SECONDS)
+        get_and_assert_status(ref, "ACTIVE", True)
+
+        # Verify auto-mode activation via EKS update history (since DescribeCluster may not reflect the fields immediately)
+        updates_summary = eks_client.list_updates(name=cluster_name)
+
+        update_ids = updates_summary.get("updateIds", [])
+        assert len(update_ids) == 1, (
+            f"Expected exactly 1 update, got {len(update_ids)}: {update_ids}"
+        )
+
+        update_id = update_ids[0]
+        upd_desc = eks_client.describe_update(name=cluster_name, updateId=update_id)
+
+        update_info = upd_desc["update"]
+
+        # Verify update type and status
+        assert update_info["type"] == "AutoModeUpdate", (
+            f"Expected AutoModeUpdate, got: {update_info['type']}"
+        )
+        assert update_info["status"] == "Successful", (
+            f"Expected Successful status, got: {update_info['status']}"
+        )
+
+    def test_disable_auto_mode_incorrectly(self, eks_client, auto_mode_cluster):
+        (ref, cr) = auto_mode_cluster
+        cluster_name = cr["spec"]["name"]
+
+        try:
+            aws_res = eks_client.describe_cluster(name=cluster_name)
+            assert aws_res is not None
+        except eks_client.exceptions.ResourceNotFoundException:
+            pytest.fail(f"Could not find cluster '{cluster_name}' in EKS")
+
+        wait_for_cluster_active(eks_client, cluster_name)
+        time.sleep(CHECK_STATUS_WAIT_SECONDS)
+        get_and_assert_status(ref, "ACTIVE", True)
+
+        # Patch with incorrect parameters to disable auto-mode
+        patch_disable_auto_mode_incorrectly = {
+            "spec": {
+                "computeConfig": {"enabled": False},
+                "storageConfig": {
+                    "blockStorage": {
+                        "enabled": True  # Should be False
+                    }
+                },
+                "kubernetesNetworkConfig": {"elasticLoadBalancing": {"enabled": False}},
+            }
+        }
+
+        k8s.patch_custom_resource(ref, patch_disable_auto_mode_incorrectly)
+        time.sleep(MODIFY_WAIT_AFTER_SECONDS)
+
+        # The controller should detect the invalid configuration and set a terminal condition.
+        terminal_condition = "ACK.Terminal"
+        cond = k8s.get_resource_condition(ref, terminal_condition)
+        if cond is None:
+            pytest.fail(
+                f"Failed to find {terminal_condition} condition in resource {ref}"
+            )
+
+        cond_status = cond.get("status", None)
+        if str(cond_status) != str(True):
+            pytest.fail(
+                f"Expected {terminal_condition} condition to have status True but found {cond_status}"
+            )
+
+        # Verify the error message contains information about invalid Auto Mode configuration
+        assert "invalid Auto Mode configuration" in cond.get("message", "")
+
+    def test_disable_auto_mode_correctly(self, eks_client, auto_mode_cluster):
+        (ref, cr) = auto_mode_cluster
+        cluster_name = cr["spec"]["name"]
+
+        try:
+            aws_res = eks_client.describe_cluster(name=cluster_name)
+            assert aws_res is not None
+        except eks_client.exceptions.ResourceNotFoundException:
+            pytest.fail(f"Could not find cluster '{cluster_name}' in EKS")
+
+        wait_for_cluster_active(eks_client, cluster_name)
+        time.sleep(CHECK_STATUS_WAIT_SECONDS)
+        get_and_assert_status(ref, "ACTIVE", True)
+
+        # Patch to disable auto-mode correctly
+        patch_disable_auto_mode = {
+            "spec": {
+                "computeConfig": {"enabled": False},
+                "storageConfig": {"blockStorage": {"enabled": False}},
+                "kubernetesNetworkConfig": {"elasticLoadBalancing": {"enabled": False}},
+            }
+        }
+
+        k8s.patch_custom_resource(ref, patch_disable_auto_mode)
+        time.sleep(MODIFY_WAIT_AFTER_SECONDS )
+        get_and_assert_status(ref, "UPDATING", False)
+
+        wait_for_cluster_active(eks_client, cluster_name)
+        time.sleep(CHECK_STATUS_WAIT_SECONDS)
+        get_and_assert_status(ref, "ACTIVE", True)
+
+        # Verify auto-mode is disabled
+        aws_res = eks_client.describe_cluster(name=cluster_name)
+        compute_config = aws_res["cluster"].get("computeConfig")
+        if compute_config is not None:
+            assert compute_config.get("enabled") is False, (
+                f"computeConfig.enabled should be False or absent, got: {compute_config.get('enabled')}"
+            )
+
+        storage_config = aws_res["cluster"].get("storageConfig")
+        if storage_config is not None:
+            block_storage = storage_config.get("blockStorage", {})
+            if block_storage:
+                assert block_storage.get("enabled") is False, (
+                    f"storageConfig.blockStorage.enabled should be False or absent, got: {block_storage.get('enabled')}"
+                )
+
+        k8s_network_config = aws_res["cluster"].get("kubernetesNetworkConfig", {})
+        elb_config = k8s_network_config.get("elasticLoadBalancing")
+        if elb_config is not None:
+            assert elb_config.get("enabled") is False, (
+                f"kubernetesNetworkConfig.elasticLoadBalancing.enabled should be False or absent, got: {elb_config.get('enabled')}"
+            )